In [1]:
import numpy as np
import pandas as pd

from tqdm import tqdm

from utils.data import *
from utils.metrics import MAPE, WAPE, MAE
from utils.dl import *

In [2]:
dataset_path = "data/huge_stock_market_dataset/"

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

lags = 32
horizon = 8
stride = 1
batch_size = 256
val_size = 0.15
test_size = 0.0
drop_last = False
features = 1
epochs = 200
verbose = False

model_params = {'num_channels': [128] * 4, 'kernel_size': 2, 'dropout': 0.25, 'output_size': horizon, 'input_size': lags}

cuda:0


In [4]:
ts_iterator = get_hsm_dataset(dataset_path, selected_files=f"{dataset_path}/selected.csv")

results = []
for time_series in tqdm(ts_iterator):
    train_dl, val_dl, test_dl, X_scaler, y_scaler = create_ts_dl(time_series[["Close"]], time_series["Close"], lags=lags, horizon=horizon, stride=stride,\
                                            batch_size=batch_size, device=device, data_preprocess="log_returns",\
                                            val_size=val_size, test_size=test_size, drop_last=drop_last)
    
    model = Model(seed=0, device=device)
    model.set_model(TCN, **model_params)
    optim_params = {'params': model.model.parameters(), 'lr': 4e-4}
    model.set_optim(torch.optim.AdamW, **optim_params)
    model.set_criterion(MAE)

    model.train(train_dl, epochs=epochs, print_info=verbose, agg_loss="mean")
    results.append({"train": model.eval(train_dl, agg_loss="mean"), "val": model.eval(val_dl, agg_loss="mean")})

    del model, train_dl, val_dl, test_dl
    torch.cuda.empty_cache()

0it [00:00, ?it/s]Global seed set to 0
1it [00:12, 12.19s/it]Global seed set to 0
2it [00:18,  8.76s/it]Global seed set to 0
3it [00:24,  7.56s/it]Global seed set to 0
4it [00:30,  6.98s/it]Global seed set to 0
5it [00:34,  5.95s/it]Global seed set to 0
6it [00:47,  8.19s/it]Global seed set to 0
7it [00:57,  8.67s/it]Global seed set to 0
8it [01:06,  9.01s/it]Global seed set to 0
9it [01:18,  9.88s/it]Global seed set to 0
10it [01:28,  9.89s/it]Global seed set to 0
11it [01:46, 12.25s/it]Global seed set to 0
12it [02:03, 13.91s/it]Global seed set to 0
13it [02:21, 15.16s/it]Global seed set to 0
14it [02:38, 15.48s/it]Global seed set to 0
15it [02:57, 16.62s/it]Global seed set to 0
16it [03:16, 17.52s/it]Global seed set to 0
17it [03:38, 18.71s/it]Global seed set to 0
18it [04:00, 19.72s/it]Global seed set to 0
19it [04:21, 20.22s/it]Global seed set to 0
20it [04:43, 20.56s/it]Global seed set to 0
21it [05:57, 36.70s/it]Global seed set to 0
22it [07:14, 48.84s/it]Global seed set to 0
23

In [5]:
results

[{'train': 0.07601827010512352, 'val': 0.09294126182794571},
 {'train': 0.03591239328185717, 'val': 0.05265411362051964},
 {'train': 0.006942708821346362, 'val': 0.005144833587110043},
 {'train': 0.009543801036973795, 'val': 0.010662950575351715},
 {'train': 0.01589103927835822, 'val': 0.013669661246240139},
 {'train': 0.026791866247852642, 'val': 0.027336619794368744},
 {'train': 0.02450627014040947, 'val': 0.036064375191926956},
 {'train': 0.0166728213429451, 'val': 0.015392552129924297},
 {'train': 0.008599746196220318, 'val': 0.0061883225571364164},
 {'train': 0.007930880784988404, 'val': 0.0070607042871415615},
 {'train': 0.047204966760343976, 'val': 0.039447009563446045},
 {'train': 0.034146616235375404, 'val': 0.021188664250075817},
 {'train': 0.03233239323728614, 'val': 0.032122411765158176},
 {'train': 0.011525885667651892, 'val': 0.006635635392740369},
 {'train': 0.017489527072757482, 'val': 0.01032670121639967},
 {'train': 0.03790958225727081, 'val': 0.027314666658639908},
 

In [7]:
pd.DataFrame(results).to_csv("results\\pure_TCN.csv", index=False)

# Augmentation with QuantGAN synthetic data

In [10]:
class CombinedDataLoader:
    def __init__(self, *dls):
        self.dls = dls

    def __len__(self):
        return sum(map(len, self.dls))
    
    def __iter__(self):
        for dl in self.dls:
            for v in dl:
                yield v


def train_synth(synthetic_path):
    ts_iterator = get_hsm_dataset(dataset_path, selected_files=f"{dataset_path}/selected.csv")
    epochs = 2

    results = []
    for ts_index, time_series in tqdm(enumerate(ts_iterator)):
        train_dl, val_dl, test_dl, X_scaler, y_scaler = create_ts_dl(time_series[["Close"]], time_series["Close"], lags=lags, horizon=horizon, stride=stride,\
                                                batch_size=batch_size, device=device, data_preprocess="log_returns",\
                                                val_size=val_size, test_size=test_size, drop_last=drop_last)

        synth_time_series = np.load(f"{synthetic_path}selected{ts_index}.npy")
        synth_dls = []
        for i in range(synth_time_series.shape[0]):
            synth_dl, *_ = create_ts_dl(synth_time_series[i].reshape(- 1, 1), synth_time_series[i].flatten(), lags=lags, horizon=horizon, stride=stride,\
                                                batch_size=batch_size, device=device, data_preprocess=None,\
                                                val_size=0, test_size=0, drop_last=drop_last)
            synth_dls.append(synth_dl)
        
        model = Model(seed=0, device=device)
        model.set_model(TCN, **model_params)
        optim_params = {'params': model.model.parameters(), 'lr': 4e-4}
        model.set_optim(torch.optim.AdamW, **optim_params)
        model.set_criterion(MAE)

        # cdl = CombinedDataLoader(train_dl, *synth_dls)
        # only synth data
        cdl = CombinedDataLoader(*synth_dls)
        model.train(cdl, epochs=epochs, print_info=verbose, agg_loss="mean")
        results.append({"train": model.eval(train_dl, agg_loss="mean"), "val": model.eval(val_dl, agg_loss="mean")})

        del model, train_dl, val_dl, test_dl
        torch.cuda.empty_cache()
    return results

In [7]:
results = train_synth(f"{dataset_path}synthetic/QuantGAN/")
results

[{'train': 0.10775789991021156, 'val': 0.08761278539896011},
 {'train': 0.04027101770043373, 'val': 0.05313824117183685},
 {'train': 0.007714773993939161, 'val': 0.0056333988904953},
 {'train': 0.010017184230188528, 'val': 0.010736174881458282},
 {'train': 0.016770757269114256, 'val': 0.012860503979027271},
 {'train': 0.029067910586794216, 'val': 0.02728160098195076},
 {'train': 0.02904174029827118, 'val': 0.03444803133606911},
 {'train': 0.017426739819347857, 'val': 0.015151316300034523},
 {'train': 0.009081647576143345, 'val': 0.006351890275254846},
 {'train': 0.008119670208543539, 'val': 0.006900529842823744},
 {'train': 0.053747433341211744, 'val': 0.03965535759925842},
 {'train': 0.03879339247941971, 'val': 0.020823094062507153},
 {'train': 0.03564218493799368, 'val': 0.03145353216677904},
 {'train': 0.01201541442424059, 'val': 0.0064820037223398685},
 {'train': 0.018960770405828952, 'val': 0.010626457631587982},
 {'train': 0.04312667790800333, 'val': 0.02642847504466772},
 {'trai

In [8]:
pd.DataFrame(results).to_csv("results\\QuantGAN_synth_TCN.csv", index=False)

# Augmentation with FourierFlow synthetic data

In [7]:
results = train_synth(f"{dataset_path}synthetic/FourierFlow/")
results

0it [00:00, ?it/s]Global seed set to 0
1it [00:34, 34.76s/it]Global seed set to 0
2it [00:55, 26.41s/it]Global seed set to 0
3it [01:16, 24.14s/it]Global seed set to 0
4it [01:39, 23.66s/it]Global seed set to 0
5it [02:00, 22.46s/it]Global seed set to 0
6it [02:21, 22.06s/it]Global seed set to 0
7it [02:43, 22.16s/it]Global seed set to 0
8it [03:06, 22.28s/it]Global seed set to 0
9it [03:27, 21.92s/it]Global seed set to 0
10it [03:47, 21.52s/it]Global seed set to 0
11it [04:07, 20.80s/it]Global seed set to 0
12it [04:26, 20.46s/it]Global seed set to 0
13it [04:47, 20.63s/it]Global seed set to 0
14it [05:07, 20.27s/it]Global seed set to 0
15it [05:27, 20.11s/it]Global seed set to 0
16it [05:46, 19.99s/it]Global seed set to 0
17it [06:06, 19.81s/it]Global seed set to 0
18it [06:27, 20.25s/it]Global seed set to 0
19it [06:46, 20.04s/it]Global seed set to 0
20it [07:06, 19.98s/it]Global seed set to 0
21it [07:27, 20.12s/it]Global seed set to 0
22it [07:47, 20.21s/it]Global seed set to 0
23

In [9]:
pd.DataFrame(results).to_csv("results\\FourierFlow_synth_TCN.csv", index=False)

# Augmentation with RealNVP synthetic data

In [16]:
results = train_synth(f"{dataset_path}synthetic/RealNVP/")
results

0it [00:00, ?it/s]Global seed set to 0
1it [00:25, 25.05s/it]Global seed set to 0
2it [00:44, 21.77s/it]Global seed set to 0
3it [01:04, 20.89s/it]Global seed set to 0
4it [01:26, 21.49s/it]Global seed set to 0
5it [01:48, 21.63s/it]Global seed set to 0
6it [02:12, 22.22s/it]Global seed set to 0
7it [02:36, 22.82s/it]Global seed set to 0
8it [02:57, 22.49s/it]Global seed set to 0
9it [03:19, 22.25s/it]Global seed set to 0
10it [03:40, 21.81s/it]Global seed set to 0
11it [04:01, 21.66s/it]Global seed set to 0
12it [04:22, 21.29s/it]Global seed set to 0
13it [04:44, 21.48s/it]Global seed set to 0
14it [05:04, 21.27s/it]Global seed set to 0
15it [05:23, 20.54s/it]Global seed set to 0
16it [05:40, 19.39s/it]Global seed set to 0
17it [05:57, 18.60s/it]Global seed set to 0
18it [06:15, 18.39s/it]Global seed set to 0
19it [06:34, 18.60s/it]Global seed set to 0
20it [06:53, 18.94s/it]Global seed set to 0
21it [07:10, 18.35s/it]Global seed set to 0
22it [07:29, 18.55s/it]Global seed set to 0
23

In [18]:
pd.DataFrame(results).to_csv("results\\RealNVP_synth_TCN.csv", index=False)

# Augmentation with TTS GAN synthetic data

In [12]:
results = train_synth(f"{dataset_path}synthetic/TTS_GAN/")
results

0it [00:00, ?it/s]Global seed set to 0
1it [00:28, 28.92s/it]Global seed set to 0
2it [01:04, 32.71s/it]Global seed set to 0
3it [01:38, 33.39s/it]Global seed set to 0
4it [02:10, 32.88s/it]Global seed set to 0
5it [02:41, 32.23s/it]Global seed set to 0
6it [03:09, 30.70s/it]Global seed set to 0
7it [03:35, 29.30s/it]Global seed set to 0
8it [04:02, 28.34s/it]Global seed set to 0
9it [04:27, 27.54s/it]Global seed set to 0
10it [04:53, 27.05s/it]Global seed set to 0
11it [05:20, 26.83s/it]Global seed set to 0
12it [05:46, 26.55s/it]Global seed set to 0
13it [06:12, 26.37s/it]Global seed set to 0
14it [06:37, 26.21s/it]Global seed set to 0
15it [07:04, 26.43s/it]Global seed set to 0
16it [07:30, 26.32s/it]Global seed set to 0
17it [07:57, 26.29s/it]Global seed set to 0
18it [08:23, 26.41s/it]Global seed set to 0
19it [08:50, 26.38s/it]Global seed set to 0
20it [09:16, 26.37s/it]Global seed set to 0
21it [09:43, 26.55s/it]Global seed set to 0
22it [10:10, 26.68s/it]Global seed set to 0
23

[{'train': 0.1234881691634655, 'val': 0.11661145836114883},
 {'train': 0.0406411848962307, 'val': 0.05121273919939995},
 {'train': 0.007865304437776407, 'val': 0.0061461441218853},
 {'train': 0.010188820927093426, 'val': 0.010907528921961784},
 {'train': 0.04357287473976612, 'val': 0.04281071946024895},
 {'train': 0.02910853922367096, 'val': 0.027308590710163116},
 {'train': 0.02897317223250866, 'val': 0.03419254347681999},
 {'train': 0.017463923431932926, 'val': 0.015027855522930622},
 {'train': 0.009026645217090845, 'val': 0.006393409334123135},
 {'train': 0.008107946626842022, 'val': 0.006907070055603981},
 {'train': 0.05294864748915037, 'val': 0.038608504459261894},
 {'train': 0.03836228201786677, 'val': 0.02062815148383379},
 {'train': 0.03631164340509309, 'val': 0.03271069750189781},
 {'train': 0.011803480563685298, 'val': 0.006573209073394537},
 {'train': 0.01886085979640484, 'val': 0.010584883391857147},
 {'train': 0.04373375661671162, 'val': 0.026596336625516415},
 {'train': 0

In [13]:
pd.DataFrame(results).to_csv("results\\TTS_GAN_synth_TCN.csv", index=False)