In [1]:
import numpy as np
import pandas as pd

from tqdm import tqdm

import torch
from pytorch_lightning import seed_everything

from utils.data import get_hsm_dataset, split_data, log_returns
from utils.metrics import MAPE, WAPE, MAE

from fourier_flows.SequentialFlows import FourierFlow, RealNVP, TimeFlow

In [2]:
dataset_path = "data/huge_stock_market_dataset/"
synthetic_path = f"{dataset_path}synthetic/FourierFlow/"
models_dir = "models/"

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

val_size = 0.0
test_size = 0.3

T = 127
n_samples = 1600 * 127  # number of samples generated by QuantGAN

cuda:0


# Fourier Flow

In [4]:
ts_iterator = get_hsm_dataset(dataset_path, selected_files=f"{dataset_path}/selected.csv")
seed_everything(0)

for ts_index, time_series in enumerate(ts_iterator):
    print(f"Time Series #{ts_index}")
    
    (train_ts, *_), *_ = split_data(time_series, val_size=val_size, test_size=test_size)
    train_ts = log_returns(train_ts)
    train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

    FF_model = FourierFlow(hidden=200, fft_size=len(train_ts), n_flows=10, normalize=False)

    FF_losses = FF_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
                            learning_rate=1e-3, display_step=50)

    synth_data = FF_model.sample(n_samples // len(train_ts))
    np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

    del train_ts, synth_data, FF_model, FF_losses

Global seed set to 0


Time Series #0
step: 0 	/ 50 	-	loss: 386.364
step: 49 	/ 50 	|	loss: -1892.182
Finished training!
Time Series #1
step: 0 	/ 50 	-	loss: 778.142
step: 49 	/ 50 	|	loss: -4973.083
Finished training!
Time Series #2
step: 0 	/ 50 	-	loss: 795.150
step: 49 	/ 50 	|	loss: -5734.431
Finished training!
Time Series #3
step: 0 	/ 50 	-	loss: 672.038
step: 49 	/ 50 	|	loss: -4722.202
Finished training!
Time Series #4
step: 0 	/ 50 	-	loss: 535.835
step: 49 	/ 50 	|	loss: -3467.864
Finished training!
Time Series #5
step: 0 	/ 50 	-	loss: 1487.181
step: 49 	/ 50 	|	loss: -10080.338
Finished training!
Time Series #6
step: 0 	/ 50 	-	loss: 1054.862
step: 49 	/ 50 	|	loss: -7514.502
Finished training!
Time Series #7
step: 0 	/ 50 	-	loss: 1238.732
step: 49 	/ 50 	|	loss: -8594.308
Finished training!
Time Series #8
step: 0 	/ 50 	-	loss: 1549.401
step: 49 	/ 50 	|	loss: -12172.836
Finished training!
Time Series #9
step: 0 	/ 50 	-	loss: 1218.759
step: 49 	/ 50 	|	loss: -9871.307
Finished training!
Tim

4:15

# RealNVP

In [5]:
synthetic_path = synthetic_path = f"{dataset_path}synthetic/RealNVP/"
ts_iterator = get_hsm_dataset(dataset_path, selected_files=f"{dataset_path}/selected.csv")
seed_everything(0)

for ts_index, time_series in enumerate(ts_iterator):
    print(f"Time Series #{ts_index}")
    
    (train_ts, *_), *_ = split_data(time_series, val_size=val_size, test_size=test_size)
    train_ts = log_returns(train_ts)
    train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

    RealNVP_model = RealNVP(hidden=200, T=len(train_ts), n_flows=10, normalize=False)

    RealNVP_losses = RealNVP_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
                            learning_rate=1e-3, display_step=50)

    synth_data = RealNVP_model.sample(n_samples // len(train_ts))
    np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

    del train_ts, synth_data, RealNVP_model, RealNVP_losses

Global seed set to 0


Time Series #0
step: 0 	/ 50 	-	loss: 405.040
step: 49 	/ 50 	|	loss: -1359.304
Finished training!
Time Series #1
step: 0 	/ 50 	-	loss: 786.542
step: 49 	/ 50 	|	loss: -3289.140
Finished training!
Time Series #2
step: 0 	/ 50 	-	loss: 768.657
step: 49 	/ 50 	|	loss: -4175.608
Finished training!
Time Series #3
step: 0 	/ 50 	-	loss: 640.137
step: 49 	/ 50 	|	loss: -3085.500
Finished training!
Time Series #4
step: 0 	/ 50 	-	loss: 504.549
step: 49 	/ 50 	|	loss: -2321.648
Finished training!
Time Series #5
step: 0 	/ 50 	-	loss: 1436.041
step: 49 	/ 50 	|	loss: -7238.917
Finished training!
Time Series #6
step: 0 	/ 50 	-	loss: 1072.026
step: 49 	/ 50 	|	loss: -5224.252
Finished training!
Time Series #7
step: 0 	/ 50 	-	loss: 1202.877
step: 49 	/ 50 	|	loss: -5593.020
Finished training!
Time Series #8
step: 0 	/ 50 	-	loss: 1530.266
step: 49 	/ 50 	|	loss: -7939.487
Finished training!
Time Series #9
step: 0 	/ 50 	-	loss: 1226.088
step: 49 	/ 50 	|	loss: -6521.028
Finished training!
Time 

Time: 4:02

# TimeFlow

In [5]:
synthetic_path = synthetic_path = f"{dataset_path}synthetic/TimeFlow/"
ts_iterator = get_hsm_dataset(dataset_path, selected_files=f"{dataset_path}/selected.csv")
seed_everything(0)

for ts_index, time_series in enumerate(ts_iterator):
    print(f"Time Series #{ts_index}")
    
    (train_ts, *_), *_ = split_data(time_series, val_size=val_size, test_size=test_size)
    train_ts = log_returns(train_ts)
    train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

    TimeFlow_model = TimeFlow(hidden=200, T=len(train_ts), n_flows=10, normalize=False)

    TimeFlow_losses = TimeFlow_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
                            learning_rate=1e-3, display_step=50)

    synth_data = TimeFlow_model.sample(n_samples // len(train_ts))
    np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

    del train_ts, synth_data, TimeFlow_model, TimeFlow_losses

Global seed set to 0


Time Series #0
step: 0 	/ 50 	-	loss: 454.488
step: 49 	/ 50 	|	loss: -1629.545
Finished training!
Time Series #1
step: 0 	/ 50 	-	loss: 938.817
step: 49 	/ 50 	|	loss: -4023.636
Finished training!
Time Series #2
step: 0 	/ 50 	-	loss: 890.112
step: 49 	/ 50 	|	loss: -5208.440
Finished training!
Time Series #3
step: 0 	/ 50 	-	loss: 781.526
step: 49 	/ 50 	|	loss: -3923.052
Finished training!
Time Series #4
step: 0 	/ 50 	-	loss: 620.486
step: 49 	/ 50 	|	loss: -2865.744
Finished training!
Time Series #5
step: 0 	/ 50 	-	loss: 1754.506
step: 49 	/ 50 	|	loss: -8762.397
Finished training!
Time Series #6
step: 0 	/ 50 	-	loss: 1332.166
step: 49 	/ 50 	|	loss: -6392.702
Finished training!
Time Series #7
step: 0 	/ 50 	-	loss: 1433.448
step: 49 	/ 50 	|	loss: -6861.422
Finished training!
Time Series #8
step: 0 	/ 50 	-	loss: 1873.701
step: 49 	/ 50 	|	loss: -9757.703
Finished training!
Time Series #9
step: 0 	/ 50 	-	loss: 1506.357
step: 49 	/ 50 	|	loss: -7991.845
Finished training!
Time 

KeyboardInterrupt: 

12 ts time: ~30 min

# Similarity

In [9]:
from tqdm import tqdm
from pathlib import Path

import numpy as np
import pandas as pd
from scipy.special import kl_div

from utils.data import get_hsm_dataset, split_data, log_returns

In [13]:
dataset_path = Path("data/huge_stock_market_dataset/")
results_dir = Path("results")

val_size = 0.0
test_size = 0.3

In [32]:
sj_div = lambda x, y: (kl_div(x, (x + y) / 2) + kl_div(y, (x + y) / 2)) / 2
min_max_norm = lambda x: (x - x.min()) / (x.max() - x.min())

In [43]:
for model in ("FourierFlow", "RealNVP"):
    synthetic_path = dataset_path / f"synthetic/{model}/"
    results = {"kl_div": [], "sj_div": []}
    ts_iterator = get_hsm_dataset(dataset_path, selected_files=f"{dataset_path}/selected.csv")

    for ts_index, time_series in tqdm(enumerate(ts_iterator)):
        (train_ts, *_), *_ = split_data(time_series, val_size=val_size, test_size=test_size)
        train_ts = log_returns(train_ts)
        train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)].values.flatten()
        train_ts = min_max_norm(train_ts)
        
        synth_tss = np.load(synthetic_path / f"selected{ts_index}.npy")
        kl_div_res = sj_div_res = 0
        for synth_ts in synth_tss:
            synth_ts = min_max_norm(synth_ts)
            res = kl_div(synth_ts, train_ts)
            kl_div_res += np.where(np.isinf(res), 0, res).mean()
            sj_div_res += sj_div(synth_ts, train_ts).mean()
        results["kl_div"].append(kl_div_res / len(synth_tss))
        results["sj_div"].append(sj_div_res / len(synth_tss))
    
    pd.DataFrame(results).to_csv(results_dir / f"synth_sim_{model}.csv", index=False)

24it [00:00, 25.16it/s]
24it [00:00, 25.96it/s]
