In [1]:
import numpy as np
import pandas as pd

from tqdm import tqdm

from utils.data import *
from utils.metrics import MAPE, WAPE, MAE
from utils.dl import *

In [2]:
dataset_path = "data/huge_stock_market_dataset/"

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

lags = 32
horizon = 8
stride = 1
batch_size = 256
val_size = 0.0
test_size = 0.3
drop_last = False
features = 1
epochs = 200
verbose = False

model_params = {'num_channels': [128] * 4, 'kernel_size': 2, 'dropout': 0.25, 'output_size': horizon, 'input_size': lags}

cuda:0


In [5]:
ts_iterator = get_hsm_dataset(dataset_path, selected_files=f"{dataset_path}/selected.csv")

results = []
for time_series in tqdm(ts_iterator):
    train_dl, val_dl, test_dl, X_scaler, y_scaler = create_ts_dl(time_series[["Close"]], time_series["Close"], lags=lags, horizon=horizon, stride=stride,\
                                            batch_size=batch_size, device=device, data_preprocess=("log_returns", "normalize"),\
                                            val_size=val_size, test_size=test_size, drop_last=drop_last)
    
    model = Model(seed=0, device=device)
    model.set_model(TCN, **model_params)
    optim_params = {'params': model.model.parameters(), 'lr': 4e-4}
    model.set_optim(torch.optim.AdamW, **optim_params)
    model.set_criterion(MAE)

    model.train(train_dl, epochs=epochs, print_info=verbose, agg_loss="mean")
    results.append({"train": model.eval(train_dl, agg_loss="mean"), "test": model.eval(test_dl, agg_loss="mean")})

    del model, train_dl, val_dl, test_dl
    torch.cuda.empty_cache()
results

0it [00:00, ?it/s]Global seed set to 0
1it [00:04,  4.66s/it]Global seed set to 0
2it [00:10,  5.33s/it]Global seed set to 0
3it [00:16,  5.62s/it]Global seed set to 0
4it [00:20,  5.04s/it]Global seed set to 0
5it [00:24,  4.60s/it]Global seed set to 0
6it [00:34,  6.57s/it]Global seed set to 0
7it [00:42,  6.93s/it]Global seed set to 0
8it [00:50,  7.35s/it]Global seed set to 0
9it [01:01,  8.30s/it]Global seed set to 0
10it [01:09,  8.31s/it]Global seed set to 0
11it [01:25, 10.64s/it]Global seed set to 0
12it [01:42, 12.54s/it]Global seed set to 0
13it [01:58, 13.76s/it]Global seed set to 0
14it [02:13, 13.91s/it]Global seed set to 0
15it [02:29, 14.54s/it]Global seed set to 0
16it [02:47, 15.72s/it]Global seed set to 0
17it [03:06, 16.82s/it]Global seed set to 0
18it [03:27, 17.90s/it]Global seed set to 0
19it [03:46, 18.23s/it]Global seed set to 0
20it [04:04, 18.37s/it]Global seed set to 0
21it [05:09, 32.28s/it]Global seed set to 0
22it [06:26, 45.72s/it]Global seed set to 0
23

[{'train': 0.19787601940333843, 'test': 0.5082648396492004},
 {'train': 0.3392236332098643, 'test': 0.7079529017210007},
 {'train': 0.3041059697667758, 'test': 0.6652947664260864},
 {'train': 0.30083201825618744, 'test': 0.7360950112342834},
 {'train': 0.2789757028222084, 'test': 0.5395439267158508},
 {'train': 0.31913799345493316, 'test': 0.8505125641822815},
 {'train': 0.3152100183069706, 'test': 0.9676997065544128},
 {'train': 0.29224710166454315, 'test': 0.574544370174408},
 {'train': 0.3443475902080536, 'test': 0.6906651059786478},
 {'train': 0.3177557587623596, 'test': 0.8755089044570923},
 {'train': 0.37008901685476303, 'test': 0.6570601090788841},
 {'train': 0.3068774398416281, 'test': 0.46979328989982605},
 {'train': 0.30915280524641275, 'test': 0.5998988822102547},
 {'train': 0.32610432377883364, 'test': 0.524766594171524},
 {'train': 0.36166948452591896, 'test': 0.5083873048424721},
 {'train': 0.367029650343789, 'test': 0.42380300909280777},
 {'train': 0.3443550235695309, 't

In [6]:
pd.DataFrame(results).to_csv(f"results\\pure_TCN_h{horizon}.csv", index=False)

# Augmentation with QuantGAN synthetic data

In [5]:
class CombinedDataLoader:
    def __init__(self, *dls):
        self.dls = dls

    def __len__(self):
        return sum(map(len, self.dls))
    
    def __iter__(self):
        for dl in self.dls:
            for v in dl:
                yield v


def train_synth(synthetic_path):
    ts_iterator = get_hsm_dataset(dataset_path, selected_files=f"{dataset_path}/selected.csv")
    epochs = 2

    results = []
    for ts_index, time_series in tqdm(enumerate(ts_iterator)):
        synth_time_series = np.load(f"{synthetic_path}selected{ts_index}.npy")
        scaler = DimUniversalStandardScaler()
        synth_time_series = scaler.fit_transform(synth_time_series)
        synth_dls = []
        for i in range(synth_time_series.shape[0]):
            synth_dl, _, _, X_scaler, y_scaler = create_ts_dl(synth_time_series[i].reshape(- 1, 1), synth_time_series[i].flatten(), lags=lags, horizon=horizon, stride=stride,\
                                                batch_size=batch_size, device=device, data_preprocess=(None,),\
                                                val_size=0, test_size=0, drop_last=drop_last)
            synth_dls.append(synth_dl)

        train_dl, _, test_dl, *_ = create_ts_dl(time_series[["Close"]], time_series["Close"], lags=lags, horizon=horizon, stride=stride,\
                                                data_preprocess=("log_returns", "normalize"), device=device,\
                                                val_size=val_size, test_size=test_size, batch_size=batch_size, drop_last=drop_last, scaler=scaler)
        
        model = Model(seed=0, device=device)
        model.set_model(TCN, **model_params)
        optim_params = {'params': model.model.parameters(), 'lr': 4e-4}
        model.set_optim(torch.optim.AdamW, **optim_params)
        model.set_criterion(MAE)

        # cdl = CombinedDataLoader(train_dl, *synth_dls)
        # only synth data
        cdl = CombinedDataLoader(*synth_dls)
        model.train(cdl, epochs=epochs, print_info=verbose, agg_loss="mean")
        results.append({"train": model.eval(train_dl, agg_loss="mean"), "test": model.eval(test_dl, agg_loss="mean")})

        del model, train_dl, test_dl, cdl
        torch.cuda.empty_cache()
    return results

In [8]:
results = train_synth(f"{dataset_path}synthetic/QuantGAN/")
results

0it [00:00, ?it/s]Global seed set to 0
1it [00:27, 27.21s/it]Global seed set to 0
2it [01:00, 31.04s/it]Global seed set to 0
3it [01:31, 30.93s/it]Global seed set to 0
4it [02:06, 32.40s/it]Global seed set to 0
5it [02:40, 32.92s/it]Global seed set to 0
6it [03:09, 31.79s/it]Global seed set to 0
7it [03:42, 32.09s/it]Global seed set to 0
8it [04:13, 31.85s/it]Global seed set to 0
9it [04:45, 31.78s/it]Global seed set to 0
10it [05:17, 31.84s/it]Global seed set to 0
11it [05:41, 29.46s/it]Global seed set to 0
12it [06:14, 30.46s/it]Global seed set to 0
13it [06:46, 30.88s/it]Global seed set to 0
14it [07:18, 31.31s/it]Global seed set to 0
15it [07:49, 31.32s/it]Global seed set to 0
16it [08:21, 31.44s/it]Global seed set to 0
17it [08:52, 31.42s/it]Global seed set to 0
18it [09:26, 32.03s/it]Global seed set to 0
19it [09:58, 32.23s/it]Global seed set to 0
20it [10:29, 31.66s/it]Global seed set to 0
21it [10:51, 28.91s/it]Global seed set to 0
22it [11:27, 31.06s/it]Global seed set to 0
23

[{'train': 0.5435892790555954, 'test': 0.54209965467453},
 {'train': 0.8270176847775778, 'test': 0.9425464868545532},
 {'train': 0.7041410207748413, 'test': 0.5689220428466797},
 {'train': 0.7249805927276611, 'test': 0.6068865656852722},
 {'train': 0.6058879792690277, 'test': 0.45383521914482117},
 {'train': 0.7171173453330993, 'test': 0.7689958214759827},
 {'train': 0.7647099792957306, 'test': 1.0203015506267548},
 {'train': 0.6623397022485733, 'test': 0.5429511666297913},
 {'train': 0.7571945428848267, 'test': 0.6169847647349039},
 {'train': 0.7418684959411621, 'test': 0.7264038622379303},
 {'train': 0.5365933477878571, 'test': 0.506879135966301},
 {'train': 0.6224485337734222, 'test': 0.4362354129552841},
 {'train': 0.6309031173586845, 'test': 0.5949422046542168},
 {'train': 0.6457414031028748, 'test': 0.43510739008585614},
 {'train': 0.6705733276903629, 'test': 0.44167160987854004},
 {'train': 0.6341570748223199, 'test': 0.39226602762937546},
 {'train': 0.6508426136440701, 'test': 

In [9]:
pd.DataFrame(results).to_csv(f"results\\QuantGAN_synth_TCN_h{horizon}.csv", index=False)

# Augmentation with FourierFlow synthetic data

In [6]:
results = train_synth(f"{dataset_path}synthetic/FourierFlow/")
results

0it [00:00, ?it/s]Global seed set to 0
1it [00:33, 33.99s/it]Global seed set to 0
2it [00:56, 27.09s/it]Global seed set to 0
3it [01:22, 26.56s/it]Global seed set to 0
4it [01:39, 22.86s/it]Global seed set to 0
5it [02:02, 22.94s/it]Global seed set to 0
6it [02:21, 21.72s/it]Global seed set to 0
7it [02:42, 21.28s/it]Global seed set to 0
8it [03:00, 20.39s/it]Global seed set to 0
9it [03:18, 19.70s/it]Global seed set to 0
10it [03:36, 19.20s/it]Global seed set to 0
11it [03:55, 19.16s/it]Global seed set to 0
12it [04:14, 19.05s/it]Global seed set to 0
13it [04:34, 19.14s/it]Global seed set to 0
14it [04:53, 19.26s/it]Global seed set to 0
15it [05:12, 18.99s/it]Global seed set to 0
16it [05:30, 18.96s/it]Global seed set to 0
17it [05:49, 18.71s/it]Global seed set to 0
18it [06:07, 18.72s/it]Global seed set to 0
19it [06:26, 18.72s/it]Global seed set to 0
20it [06:45, 18.69s/it]Global seed set to 0
21it [07:04, 18.88s/it]Global seed set to 0
22it [07:23, 18.96s/it]Global seed set to 0
23

[{'train': 0.32322874665260315, 'test': 0.4699210226535797},
 {'train': 0.3319132129351298, 'test': 0.7136789262294769},
 {'train': 0.34036607543627423, 'test': 0.5904576182365417},
 {'train': 0.22319162636995316, 'test': 0.7450160384178162},
 {'train': 0.20290099829435349, 'test': 0.5193951725959778},
 {'train': 0.40305753946304324, 'test': 0.7497147023677826},
 {'train': 0.3647677153348923, 'test': 0.8842186331748962},
 {'train': 0.32984166592359543, 'test': 0.5407803952693939},
 {'train': 0.41892513632774353, 'test': 0.5965081652005514},
 {'train': 0.38653023540973663, 'test': 0.7412186563014984},
 {'train': 0.43802810087800026, 'test': 0.6042560189962387},
 {'train': 0.4080450441688299, 'test': 0.42729654908180237},
 {'train': 0.4139963537454605, 'test': 0.5397778823971748},
 {'train': 0.4531453847885132, 'test': 0.4462505380312602},
 {'train': 0.46203041076660156, 'test': 0.4364969730377197},
 {'train': 0.48413871063126457, 'test': 0.3892286717891693},
 {'train': 0.481030725770526

In [7]:
pd.DataFrame(results).to_csv(f"results\\FourierFlow_synth_TCN_h{horizon}.csv", index=False)

# Augmentation with RealNVP synthetic data

In [8]:
results = train_synth(f"{dataset_path}synthetic/RealNVP/")
results

0it [00:00, ?it/s]Global seed set to 0
1it [00:29, 29.09s/it]Global seed set to 0
2it [00:50, 24.59s/it]Global seed set to 0
3it [01:13, 23.86s/it]Global seed set to 0
4it [01:32, 21.87s/it]Global seed set to 0
5it [01:53, 21.78s/it]Global seed set to 0
6it [02:13, 21.11s/it]Global seed set to 0
7it [02:34, 20.98s/it]Global seed set to 0
8it [02:54, 20.58s/it]Global seed set to 0
9it [03:13, 20.15s/it]Global seed set to 0
10it [03:31, 19.64s/it]Global seed set to 0
11it [03:51, 19.76s/it]Global seed set to 0
12it [04:11, 19.76s/it]Global seed set to 0
13it [04:31, 19.92s/it]Global seed set to 0
14it [04:52, 20.09s/it]Global seed set to 0
15it [05:12, 19.94s/it]Global seed set to 0
16it [05:32, 20.09s/it]Global seed set to 0
17it [05:52, 20.03s/it]Global seed set to 0
18it [06:12, 20.05s/it]Global seed set to 0
19it [06:32, 20.05s/it]Global seed set to 0
20it [06:51, 19.78s/it]Global seed set to 0
21it [07:10, 19.54s/it]Global seed set to 0
22it [07:29, 19.40s/it]Global seed set to 0
23

[{'train': 0.16566815972328186, 'test': 0.558593213558197},
 {'train': 0.1895222912232081, 'test': 0.8112404942512512},
 {'train': 0.24506431818008423, 'test': 0.6935093402862549},
 {'train': 0.15342990309000015, 'test': 0.6970542669296265},
 {'train': 0.12388695031404495, 'test': 0.48503953218460083},
 {'train': 0.2757977694272995, 'test': 0.8475994765758514},
 {'train': 0.24680794402956963, 'test': 0.9749784469604492},
 {'train': 0.2173885740339756, 'test': 0.5799757540225983},
 {'train': 0.30886733531951904, 'test': 0.6607522765795389},
 {'train': 0.2517354302108288, 'test': 0.7541792094707489},
 {'train': 0.38291193172335625, 'test': 0.6281066685914993},
 {'train': 0.32825440913438797, 'test': 0.4598536342382431},
 {'train': 0.34164662659168243, 'test': 0.5808025002479553},
 {'train': 0.3508010187319347, 'test': 0.4910944600900014},
 {'train': 0.3867180272936821, 'test': 0.490529365837574},
 {'train': 0.42101248105367023, 'test': 0.4072386547923088},
 {'train': 0.3781363070011139, 

In [9]:
pd.DataFrame(results).to_csv(f"results\\RealNVP_synth_TCN_h{horizon}.csv", index=False)

# Augmentation with TTS GAN synthetic data

In [10]:
results = train_synth(f"{dataset_path}synthetic/TTS_GAN_standard/")
results

0it [00:00, ?it/s]Global seed set to 0
1it [00:30, 30.12s/it]Global seed set to 0
2it [00:59, 29.45s/it]Global seed set to 0
3it [01:27, 28.75s/it]Global seed set to 0
4it [01:55, 28.67s/it]Global seed set to 0
5it [02:25, 29.16s/it]Global seed set to 0
6it [02:54, 28.98s/it]Global seed set to 0
7it [03:22, 28.68s/it]Global seed set to 0
8it [03:50, 28.55s/it]Global seed set to 0
9it [04:18, 28.49s/it]Global seed set to 0
10it [04:47, 28.37s/it]Global seed set to 0
11it [05:16, 28.64s/it]Global seed set to 0
12it [05:46, 29.09s/it]Global seed set to 0
13it [06:23, 31.44s/it]Global seed set to 0
14it [06:53, 31.23s/it]Global seed set to 0
15it [07:22, 30.55s/it]Global seed set to 0
16it [07:52, 30.15s/it]Global seed set to 0
17it [08:22, 30.16s/it]Global seed set to 0
18it [08:55, 31.05s/it]Global seed set to 0
19it [09:29, 31.98s/it]Global seed set to 0
20it [10:00, 31.78s/it]Global seed set to 0
21it [10:35, 32.50s/it]Global seed set to 0
22it [11:05, 31.82s/it]Global seed set to 0
23

[{'train': 0.3462240993976593, 'test': 0.34830158948898315},
 {'train': 0.4757441778977712, 'test': 0.5534115731716156},
 {'train': 0.1426496903101603, 'test': 0.14766333997249603},
 {'train': 0.10613445192575455, 'test': 0.08686386793851852},
 {'train': 0.2390531599521637, 'test': 0.2376752346754074},
 {'train': 0.782914650440216, 'test': 0.8427335321903229},
 {'train': 0.5418009459972382, 'test': 0.738556832075119},
 {'train': 0.7479911893606186, 'test': 0.5918646454811096},
 {'train': 0.22549615502357484, 'test': 0.18310852845509848},
 {'train': 0.22109955921769142, 'test': 0.2150406464934349},
 {'train': 1.0576811954379082, 'test': 1.0008201152086258},
 {'train': 0.9266853705048561, 'test': 0.676562488079071},
 {'train': 0.7268260940909386, 'test': 0.6688977181911469},
 {'train': 0.5281490385532379, 'test': 0.35435275236765545},
 {'train': 0.8313932195305824, 'test': 0.549739196896553},
 {'train': 3.2767683929867215, 'test': 3.287171244621277},
 {'train': 0.7929224703047011, 'test'

In [11]:
pd.DataFrame(results).to_csv(f"results\\TTS_GAN_synth_TCN_h{horizon}.csv", index=False)