In [1]:
import numpy as np
import pandas as pd

from tqdm import tqdm

import torch
from pytorch_lightning import seed_everything

from utils.data import get_hsm_dataset, split_data, log_returns
from utils.metrics import MAPE, WAPE, MAE

from fourier_flows.SequentialFlows import FourierFlow, RealNVP, TimeFlow

In [2]:
dataset_path = "data/huge_stock_market_dataset/"
synthetic_path = f"{dataset_path}synthetic/FourierFlow/"
models_dir = "models/"

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

val_size = 0.15
test_size = 0.0

T = 127
n_samples = 1600 * 127  # number of samples generated by QuantGAN

cuda:0


# Fourier Flow

In [6]:
ts_iterator = get_hsm_dataset(dataset_path, selected_files=f"{dataset_path}/selected.csv")
seed_everything(0)

for ts_index, time_series in enumerate(ts_iterator):
    print(f"Time Series #{ts_index}")
    
    (train_ts, *_), *_ = split_data(time_series, val_size=val_size, test_size=test_size)
    train_ts = log_returns(train_ts)
    train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

    FF_model = FourierFlow(hidden=200, fft_size=len(train_ts), n_flows=10, normalize=False)

    FF_losses = FF_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
                            learning_rate=1e-3, display_step=50)

    synth_data = FF_model.sample(n_samples // len(train_ts))
    np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

    del train_ts, synth_data, FF_model, FF_losses

Global seed set to 0


Time Series #0
step: 0 	/ 50 	-	loss: 473.580
step: 49 	/ 50 	|	loss: -2398.528
Finished training!
Time Series #1
step: 0 	/ 50 	-	loss: 1005.193
step: 49 	/ 50 	|	loss: -5915.730
Finished training!
Time Series #2
step: 0 	/ 50 	-	loss: 912.041
step: 49 	/ 50 	|	loss: -7006.384
Finished training!
Time Series #3
step: 0 	/ 50 	-	loss: 794.596
step: 49 	/ 50 	|	loss: -5858.341
Finished training!
Time Series #4
step: 0 	/ 50 	-	loss: 643.179
step: 49 	/ 50 	|	loss: -4312.153
Finished training!
Time Series #5
step: 0 	/ 50 	-	loss: 1800.049
step: 49 	/ 50 	|	loss: -12257.492
Finished training!
Time Series #6
step: 0 	/ 50 	-	loss: 1323.650
step: 49 	/ 50 	|	loss: -9180.222
Finished training!
Time Series #7
step: 0 	/ 50 	-	loss: 1501.057
step: 49 	/ 50 	|	loss: -10624.238
Finished training!
Time Series #8
step: 0 	/ 50 	-	loss: 1872.619
step: 49 	/ 50 	|	loss: -14972.262
Finished training!
Time Series #9
step: 0 	/ 50 	-	loss: 1567.120
step: 49 	/ 50 	|	loss: -11942.075
Finished training!


# RealNVP

In [4]:
synthetic_path = synthetic_path = f"{dataset_path}synthetic/RealNVP/"
ts_iterator = get_hsm_dataset(dataset_path, selected_files=f"{dataset_path}/selected.csv")
seed_everything(0)

for ts_index, time_series in enumerate(ts_iterator):
    print(f"Time Series #{ts_index}")
    
    (train_ts, *_), *_ = split_data(time_series, val_size=val_size, test_size=test_size)
    train_ts = log_returns(train_ts)
    train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

    RealNVP_model = RealNVP(hidden=200, T=len(train_ts), n_flows=10, normalize=False)

    RealNVP_losses = RealNVP_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
                            learning_rate=1e-3, display_step=50)

    synth_data = RealNVP_model.sample(n_samples // len(train_ts))
    np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

    del train_ts, synth_data, RealNVP_model, RealNVP_losses

Global seed set to 0


Time Series #0
step: 0 	/ 50 	-	loss: 465.672
step: 49 	/ 50 	|	loss: -1702.540
Finished training!
Time Series #1
step: 0 	/ 50 	-	loss: 965.470
step: 49 	/ 50 	|	loss: -3922.813
Finished training!
Time Series #2
step: 0 	/ 50 	-	loss: 935.177
step: 49 	/ 50 	|	loss: -5050.912
Finished training!
Time Series #3
step: 0 	/ 50 	-	loss: 804.124
step: 49 	/ 50 	|	loss: -3909.104
Finished training!
Time Series #4
step: 0 	/ 50 	-	loss: 673.769
step: 49 	/ 50 	|	loss: -2838.981
Finished training!
Time Series #5
step: 0 	/ 50 	-	loss: 1795.220
step: 49 	/ 50 	|	loss: -8463.556
Finished training!
Time Series #6
step: 0 	/ 50 	-	loss: 1314.388
step: 49 	/ 50 	|	loss: -6345.308
Finished training!
Time Series #7
step: 0 	/ 50 	-	loss: 1500.876
step: 49 	/ 50 	|	loss: -6804.180
Finished training!
Time Series #8
step: 0 	/ 50 	-	loss: 1859.555
step: 49 	/ 50 	|	loss: -9526.939
Finished training!
Time Series #9
step: 0 	/ 50 	-	loss: 1563.881
step: 49 	/ 50 	|	loss: -7943.099
Finished training!
Time 

Time: 5:37.2

# TimeFlow

In [5]:
synthetic_path = synthetic_path = f"{dataset_path}synthetic/TimeFlow/"
ts_iterator = get_hsm_dataset(dataset_path, selected_files=f"{dataset_path}/selected.csv")
seed_everything(0)

for ts_index, time_series in enumerate(ts_iterator):
    print(f"Time Series #{ts_index}")
    
    (train_ts, *_), *_ = split_data(time_series, val_size=val_size, test_size=test_size)
    train_ts = log_returns(train_ts)
    train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

    TimeFlow_model = TimeFlow(hidden=200, T=len(train_ts), n_flows=10, normalize=False)

    TimeFlow_losses = TimeFlow_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
                            learning_rate=1e-3, display_step=50)

    synth_data = TimeFlow_model.sample(n_samples // len(train_ts))
    np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

    del train_ts, synth_data, TimeFlow_model, TimeFlow_losses

Global seed set to 0


Time Series #0
step: 0 	/ 50 	-	loss: 454.488
step: 49 	/ 50 	|	loss: -1629.545
Finished training!
Time Series #1
step: 0 	/ 50 	-	loss: 938.817
step: 49 	/ 50 	|	loss: -4023.636
Finished training!
Time Series #2
step: 0 	/ 50 	-	loss: 890.112
step: 49 	/ 50 	|	loss: -5208.440
Finished training!
Time Series #3
step: 0 	/ 50 	-	loss: 781.526
step: 49 	/ 50 	|	loss: -3923.052
Finished training!
Time Series #4
step: 0 	/ 50 	-	loss: 620.486
step: 49 	/ 50 	|	loss: -2865.744
Finished training!
Time Series #5
step: 0 	/ 50 	-	loss: 1754.506
step: 49 	/ 50 	|	loss: -8762.397
Finished training!
Time Series #6
step: 0 	/ 50 	-	loss: 1332.166
step: 49 	/ 50 	|	loss: -6392.702
Finished training!
Time Series #7
step: 0 	/ 50 	-	loss: 1433.448
step: 49 	/ 50 	|	loss: -6861.422
Finished training!
Time Series #8
step: 0 	/ 50 	-	loss: 1873.701
step: 49 	/ 50 	|	loss: -9757.703
Finished training!
Time Series #9
step: 0 	/ 50 	-	loss: 1506.357
step: 49 	/ 50 	|	loss: -7991.845
Finished training!
Time 

KeyboardInterrupt: 

12 ts time: ~30 min