In [1]:
import numpy as np
import pandas as pd

from tqdm import tqdm

from utils.data import *
from utils.metrics import MAPE, WAPE, MAE
from utils.dl import *

In [2]:
dataset_path = "data/huge_stock_market_dataset/"

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

lags = 32
horizon = 8
stride = 1
batch_size = 256
val_size = 0.15
test_size = 0.0
drop_last = False
features = 1
epochs = 200
verbose = False

model_params = {'num_channels': [128] * 4, 'kernel_size': 2, 'dropout': 0.25, 'output_size': horizon, 'input_size': lags}

cuda:0


In [4]:
ts_iterator = get_hsm_dataset(dataset_path, selected_files=f"{dataset_path}/selected.csv")

results = []
for time_series in tqdm(ts_iterator):
    train_dl, val_dl, test_dl, X_scaler, y_scaler = create_ts_dl(time_series[["Close"]], time_series["Close"], lags=lags, horizon=horizon, stride=stride,\
                                            batch_size=batch_size, device=device, data_preprocess=("log_returns", "normalize"),\
                                            val_size=val_size, test_size=test_size, drop_last=drop_last)
    
    model = Model(seed=0, device=device)
    model.set_model(TCN, **model_params)
    optim_params = {'params': model.model.parameters(), 'lr': 4e-4}
    model.set_optim(torch.optim.AdamW, **optim_params)
    model.set_criterion(MAE)

    model.train(train_dl, epochs=epochs, print_info=verbose, agg_loss="mean")
    results.append({"train": model.eval(train_dl, agg_loss="mean"), "val": model.eval(val_dl, agg_loss="mean")})

    del model, train_dl, val_dl, test_dl
    torch.cuda.empty_cache()

0it [00:00, ?it/s]Global seed set to 0
1it [00:11, 11.18s/it]Global seed set to 0
2it [00:17,  8.08s/it]Global seed set to 0
3it [00:23,  7.10s/it]Global seed set to 0
4it [00:29,  6.70s/it]Global seed set to 0
5it [00:33,  5.76s/it]Global seed set to 0
6it [00:45,  7.97s/it]Global seed set to 0
7it [00:55,  8.57s/it]Global seed set to 0
8it [01:05,  9.20s/it]Global seed set to 0
9it [01:18, 10.43s/it]Global seed set to 0
10it [01:29, 10.53s/it]Global seed set to 0
11it [01:48, 13.18s/it]Global seed set to 0
12it [02:07, 14.84s/it]Global seed set to 0
13it [02:25, 15.67s/it]Global seed set to 0
14it [02:40, 15.70s/it]Global seed set to 0
15it [03:00, 16.84s/it]Global seed set to 0
16it [03:20, 17.70s/it]Global seed set to 0
17it [03:41, 18.92s/it]Global seed set to 0
18it [04:03, 19.76s/it]Global seed set to 0
19it [04:24, 20.25s/it]Global seed set to 0
20it [04:46, 20.66s/it]Global seed set to 0
21it [06:01, 36.85s/it]Global seed set to 0
22it [07:19, 49.44s/it]Global seed set to 0
23

In [5]:
results

[{'train': 0.23674218356609344, 'val': 0.4811241626739502},
 {'train': 0.2864615321159363, 'val': 0.7573235034942627},
 {'train': 0.3238765796025594, 'val': 0.5413807034492493},
 {'train': 0.28117311497529346, 'val': 0.8478381037712097},
 {'train': 0.3009508475661278, 'val': 0.6109363436698914},
 {'train': 0.3556194206078847, 'val': 0.649357259273529},
 {'train': 0.45028582215309143, 'val': 0.9531058073043823},
 {'train': 0.29769809544086456, 'val': 0.6143671274185181},
 {'train': 0.3617719958225886, 'val': 0.5915700793266296},
 {'train': 0.33793150782585146, 'val': 0.7651367783546448},
 {'train': 0.38597579797108966, 'val': 0.5502511113882065},
 {'train': 0.345028065972858, 'val': 0.38234348595142365},
 {'train': 0.34317925572395325, 'val': 0.595600426197052},
 {'train': 0.36123879812657833, 'val': 0.4556068927049637},
 {'train': 0.380402010679245, 'val': 0.43131493031978607},
 {'train': 0.34126788973808286, 'val': 0.39676256477832794},
 {'train': 0.3816204314882105, 'val': 0.48927529

In [6]:
pd.DataFrame(results).to_csv("results\\pure_TCN.csv", index=False)

# Augmentation with QuantGAN synthetic data

In [4]:
class CombinedDataLoader:
    def __init__(self, *dls):
        self.dls = dls

    def __len__(self):
        return sum(map(len, self.dls))
    
    def __iter__(self):
        for dl in self.dls:
            for v in dl:
                yield v


def train_synth(synthetic_path):
    ts_iterator = get_hsm_dataset(dataset_path, selected_files=f"{dataset_path}/selected.csv")
    epochs = 2

    results = []
    for ts_index, time_series in tqdm(enumerate(ts_iterator)):
        synth_time_series = np.load(f"{synthetic_path}selected{ts_index}.npy")
        scaler = DimUniversalStandardScaler()
        synth_time_series = scaler.fit_transform(synth_time_series)
        synth_dls = []
        for i in range(synth_time_series.shape[0]):
            synth_dl, _, _, X_scaler, y_scaler = create_ts_dl(synth_time_series[i].reshape(- 1, 1), synth_time_series[i].flatten(), lags=lags, horizon=horizon, stride=stride,\
                                                batch_size=batch_size, device=device, data_preprocess=(None,),\
                                                val_size=0, test_size=0, drop_last=drop_last)
            synth_dls.append(synth_dl)

        train_dl, val_dl, *_ = create_ts_dl(time_series[["Close"]], time_series["Close"], lags=lags, horizon=horizon, stride=stride,\
                                                data_preprocess=("log_returns", "normalize"), device=device,\
                                                val_size=val_size, test_size=test_size, batch_size=batch_size, drop_last=drop_last, scaler=scaler)
        
        model = Model(seed=0, device=device)
        model.set_model(TCN, **model_params)
        optim_params = {'params': model.model.parameters(), 'lr': 4e-4}
        model.set_optim(torch.optim.AdamW, **optim_params)
        model.set_criterion(MAE)

        # cdl = CombinedDataLoader(train_dl, *synth_dls)
        # only synth data
        cdl = CombinedDataLoader(*synth_dls)
        model.train(cdl, epochs=epochs, print_info=verbose, agg_loss="mean")
        results.append({"train": model.eval(train_dl, agg_loss="mean"), "val": model.eval(val_dl, agg_loss="mean")})

        del model, train_dl, val_dl, cdl
        torch.cuda.empty_cache()
    return results

In [5]:
results = train_synth(f"{dataset_path}synthetic/QuantGAN/")
results

0it [00:00, ?it/s]Global seed set to 0
1it [00:37, 37.81s/it]Global seed set to 0
2it [01:11, 35.44s/it]Global seed set to 0
3it [01:43, 33.85s/it]Global seed set to 0
4it [02:14, 32.53s/it]Global seed set to 0
5it [02:43, 31.32s/it]Global seed set to 0
6it [03:12, 30.65s/it]Global seed set to 0
7it [03:42, 30.46s/it]Global seed set to 0
8it [04:12, 30.41s/it]Global seed set to 0
9it [04:43, 30.47s/it]Global seed set to 0
10it [05:14, 30.61s/it]Global seed set to 0
11it [05:39, 28.89s/it]Global seed set to 0
12it [06:10, 29.52s/it]Global seed set to 0
13it [06:46, 31.56s/it]Global seed set to 0
14it [07:20, 32.34s/it]Global seed set to 0
15it [07:58, 33.87s/it]Global seed set to 0
16it [08:31, 33.69s/it]Global seed set to 0
17it [09:05, 33.78s/it]Global seed set to 0
18it [09:40, 34.10s/it]Global seed set to 0
19it [10:18, 35.34s/it]Global seed set to 0
20it [10:51, 34.76s/it]Global seed set to 0
21it [11:23, 33.92s/it]Global seed set to 0
22it [11:58, 34.21s/it]Global seed set to 0
23

[{'train': 0.5293527543544769, 'val': 0.43260064721107483},
 {'train': 0.7554612557093302, 'val': 0.9956327080726624},
 {'train': 0.7270463903745016, 'val': 0.5240476727485657},
 {'train': 0.6807130972544352, 'val': 0.7259753346443176},
 {'train': 0.622183084487915, 'val': 0.48031720519065857},
 {'train': 0.6886808276176453, 'val': 0.6457710266113281},
 {'train': 0.9043120741844177, 'val': 1.0736026763916016},
 {'train': 0.6925832748413085, 'val': 0.602914035320282},
 {'train': 0.7732862035433451, 'val': 0.557648628950119},
 {'train': 0.741913092136383, 'val': 0.6322433352470398},
 {'train': 0.5989659163686964, 'val': 0.43992356956005096},
 {'train': 0.7045180598894755, 'val': 0.37830910086631775},
 {'train': 0.6547368698649936, 'val': 0.5777660459280014},
 {'train': 0.731889121234417, 'val': 0.3968053460121155},
 {'train': 0.67260362803936, 'val': 0.3693453073501587},
 {'train': 0.6287688910961151, 'val': 0.38821177184581757},
 {'train': 0.6732311140407216, 'val': 0.47147688269615173}

In [6]:
pd.DataFrame(results).to_csv("results\\QuantGAN_synth_TCN.csv", index=False)

# Augmentation with FourierFlow synthetic data

In [7]:
results = train_synth(f"{dataset_path}synthetic/FourierFlow/")
results

0it [00:00, ?it/s]Global seed set to 0
1it [00:25, 25.26s/it]Global seed set to 0
2it [00:43, 21.21s/it]Global seed set to 0
3it [01:01, 19.76s/it]Global seed set to 0
4it [01:21, 19.90s/it]Global seed set to 0
5it [01:40, 19.49s/it]Global seed set to 0
6it [02:01, 19.97s/it]Global seed set to 0
7it [02:23, 20.60s/it]Global seed set to 0
8it [02:44, 20.85s/it]Global seed set to 0
9it [03:03, 20.30s/it]Global seed set to 0
10it [03:23, 20.16s/it]Global seed set to 0
11it [03:43, 20.02s/it]Global seed set to 0
12it [04:02, 19.65s/it]Global seed set to 0
13it [04:21, 19.54s/it]Global seed set to 0
14it [04:41, 19.63s/it]Global seed set to 0
15it [05:01, 19.71s/it]Global seed set to 0
16it [05:20, 19.50s/it]Global seed set to 0
17it [05:40, 19.60s/it]Global seed set to 0
18it [05:59, 19.59s/it]Global seed set to 0
19it [06:19, 19.58s/it]Global seed set to 0
20it [06:38, 19.45s/it]Global seed set to 0
21it [06:58, 19.64s/it]Global seed set to 0
22it [07:18, 19.72s/it]Global seed set to 0
23

[{'train': 0.16681548953056335, 'val': 0.4957714080810547},
 {'train': 0.291112740834554, 'val': 0.7899066209793091},
 {'train': 0.33505422870318097, 'val': 0.4277529716491699},
 {'train': 0.3135557174682617, 'val': 0.6883183717727661},
 {'train': 0.25667545199394226, 'val': 0.5605508089065552},
 {'train': 0.46882321933905285, 'val': 0.5924941897392273},
 {'train': 0.640278035402298, 'val': 0.9052080512046814},
 {'train': 0.37111603617668154, 'val': 0.542952835559845},
 {'train': 0.5142612854639689, 'val': 0.4652446359395981},
 {'train': 0.47312082052230836, 'val': 0.5541983842849731},
 {'train': 0.5057948629061381, 'val': 0.5137885063886642},
 {'train': 0.45314862661891514, 'val': 0.3324524313211441},
 {'train': 0.4481763044993083, 'val': 0.5476046055555344},
 {'train': 0.488831277936697, 'val': 0.36579301953315735},
 {'train': 0.5050724267959594, 'val': 0.34902144968509674},
 {'train': 0.4481771469116211, 'val': 0.36557671427726746},
 {'train': 0.4972009062767029, 'val': 0.4220722764

In [8]:
pd.DataFrame(results).to_csv("results\\FourierFlow_synth_TCN.csv", index=False)

# Augmentation with RealNVP synthetic data

In [9]:
results = train_synth(f"{dataset_path}synthetic/RealNVP/")
results

0it [00:00, ?it/s]Global seed set to 0
1it [00:25, 25.98s/it]Global seed set to 0
2it [00:44, 21.78s/it]Global seed set to 0
3it [01:03, 20.58s/it]Global seed set to 0
4it [01:24, 20.74s/it]Global seed set to 0
5it [01:43, 20.06s/it]Global seed set to 0
6it [02:03, 19.93s/it]Global seed set to 0
7it [02:24, 20.34s/it]Global seed set to 0
8it [02:44, 20.10s/it]Global seed set to 0
9it [03:02, 19.58s/it]Global seed set to 0
10it [03:21, 19.40s/it]Global seed set to 0
11it [03:41, 19.48s/it]Global seed set to 0
12it [04:00, 19.51s/it]Global seed set to 0
13it [04:19, 19.28s/it]Global seed set to 0
14it [04:38, 19.19s/it]Global seed set to 0
15it [04:58, 19.30s/it]Global seed set to 0
16it [05:18, 19.65s/it]Global seed set to 0
17it [05:39, 19.86s/it]Global seed set to 0
18it [05:59, 19.97s/it]Global seed set to 0
19it [06:18, 19.62s/it]Global seed set to 0
20it [06:37, 19.49s/it]Global seed set to 0
21it [06:57, 19.74s/it]Global seed set to 0
22it [07:17, 19.82s/it]Global seed set to 0
23

[{'train': 0.08122960105538368, 'val': 0.5808045864105225},
 {'train': 0.18102737267812094, 'val': 0.7948945760726929},
 {'train': 0.19302151103814444, 'val': 0.4397704005241394},
 {'train': 0.051527440547943115, 'val': 0.1737699657678604},
 {'train': 0.14469077810645103, 'val': 0.5781511664390564},
 {'train': 0.33037296930948895, 'val': 0.6563948392868042},
 {'train': 0.5946580827236175, 'val': 0.994230329990387},
 {'train': 0.2417995512485504, 'val': 0.5914024710655212},
 {'train': 0.3510037610928218, 'val': 0.5713746249675751},
 {'train': 0.28789877593517305, 'val': 0.6971303224563599},
 {'train': 0.4203063448270162, 'val': 0.5339581519365311},
 {'train': 0.394365582201216, 'val': 0.36583690345287323},
 {'train': 0.38211973508199054, 'val': 0.5643948167562485},
 {'train': 0.38391855359077454, 'val': 0.42212314903736115},
 {'train': 0.44115384817123415, 'val': 0.3940756618976593},
 {'train': 0.3957314968109131, 'val': 0.38275134563446045},
 {'train': 0.4479198537089608, 'val': 0.4575

In [10]:
pd.DataFrame(results).to_csv("results\\RealNVP_synth_TCN.csv", index=False)

# Augmentation with TTS GAN synthetic data

In [11]:
results = train_synth(f"{dataset_path}synthetic/TTS_GAN_standard/")
results

0it [00:00, ?it/s]Global seed set to 0
1it [00:27, 27.23s/it]Global seed set to 0
2it [00:54, 27.43s/it]Global seed set to 0
3it [01:21, 27.31s/it]Global seed set to 0
4it [01:49, 27.26s/it]Global seed set to 0
5it [02:16, 27.33s/it]Global seed set to 0
6it [02:43, 27.28s/it]Global seed set to 0
7it [03:14, 28.36s/it]Global seed set to 0
8it [03:42, 28.31s/it]Global seed set to 0
9it [04:10, 28.19s/it]Global seed set to 0
10it [04:39, 28.31s/it]Global seed set to 0
11it [05:08, 28.71s/it]Global seed set to 0
12it [05:36, 28.35s/it]Global seed set to 0
13it [06:03, 28.16s/it]Global seed set to 0
14it [06:31, 28.10s/it]Global seed set to 0
15it [06:59, 27.93s/it]Global seed set to 0
16it [07:27, 28.05s/it]Global seed set to 0
17it [07:55, 27.99s/it]Global seed set to 0
18it [08:25, 28.53s/it]Global seed set to 0
19it [08:53, 28.30s/it]Global seed set to 0
20it [09:20, 28.14s/it]Global seed set to 0
21it [09:50, 28.50s/it]Global seed set to 0
22it [10:19, 28.76s/it]Global seed set to 0
23

[{'train': 0.4964756965637207, 'val': 0.47004690766334534},
 {'train': 0.5562042891979218, 'val': 0.716667115688324},
 {'train': 0.18289258082707724, 'val': 0.14540649950504303},
 {'train': 0.1358382230003675, 'val': 0.1441783308982849},
 {'train': 0.25404946506023407, 'val': 0.25287356972694397},
 {'train': 0.8872835139433543, 'val': 0.8325582146644592},
 {'train': 0.7534714996814728, 'val': 0.891320526599884},
 {'train': 0.9737930059432983, 'val': 0.8468936085700989},
 {'train': 0.3038840467731158, 'val': 0.21487831324338913},
 {'train': 0.3607357144355774, 'val': 0.31421157717704773},
 {'train': 1.1060670481787787, 'val': 0.8076822459697723},
 {'train': 0.8512726293669807, 'val': 0.4568103104829788},
 {'train': 0.7585168282190958, 'val': 0.6745723634958267},
 {'train': 0.7812414392828941, 'val': 0.43341048061847687},
 {'train': 0.8819457352161407, 'val': 0.48188136518001556},
 {'train': 0.7720297247171402, 'val': 0.4707847237586975},
 {'train': 0.8863369497385892, 'val': 0.627630650

In [12]:
pd.DataFrame(results).to_csv("results\\TTS_GAN_synth_TCN.csv", index=False)