In [1]:
import numpy as np
import pandas as pd

from tqdm import tqdm

import torch
from pytorch_lightning import seed_everything

from utils.data import get_hsm_dataset, split_data, log_returns, get_solar_energy_dataset
from utils.metrics import MAPE, WAPE, MAE

from fourier_flows.SequentialFlows import FourierFlow, RealNVP, TimeFlow

In [2]:
hsm_dataset_path = "data/huge_stock_market_dataset/"
solar_energy_dataset_path = "data/solar_energy/"
models_dir = "models/"

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

val_size = 0.0
test_size = 0.0

T = 127
n_samples = 800 * 127  # number of samples generated by QuantGAN

cuda:0


# Fourier Flow

In [8]:
ts_iterator = get_hsm_dataset(hsm_dataset_path, selected_files=f"{hsm_dataset_path}/selected100.csv")
synthetic_path = f"{hsm_dataset_path}synthetic/FourierFlow/"

for ts_index, time_series in enumerate(ts_iterator):
    print(f"Time Series #{ts_index}")
    
    train_ts = log_returns(time_series)
    train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

    seed_everything(0)
    FF_model = FourierFlow(hidden=200, fft_size=len(train_ts), n_flows=10, normalize=False)

    FF_losses = FF_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
                            learning_rate=1e-3, display_step=50)

    synth_data = FF_model.sample(n_samples // len(train_ts))
    np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

    del train_ts, synth_data, FF_model, FF_losses

Global seed set to 0


Time Series #0
step: 0 	/ 50 	-	loss: 1435.699


Global seed set to 0


step: 49 	/ 50 	|	loss: -11635.513
Finished training!
Time Series #1
step: 0 	/ 50 	-	loss: 917.960


Global seed set to 0


step: 49 	/ 50 	|	loss: -7180.839
Finished training!
Time Series #2
step: 0 	/ 50 	-	loss: 917.967
step: 49 	/ 50 	|	loss: -7195.052
Finished training!
Time Series #3

Global seed set to 0



step: 0 	/ 50 	-	loss: 1435.770
step: 49 	/ 50 	|	loss: -9331.642
Finished training!


Global seed set to 0


Time Series #4
step: 0 	/ 50 	-	loss: 3635.815
step: 49 	/ 50 	|	loss: -28888.609
Finished training!


Global seed set to 0


Time Series #5
step: 0 	/ 50 	-	loss: 446.340
step: 49 	/ 50 	|	loss: -3533.223
Finished training!
Time Series #6

Global seed set to 0



step: 0 	/ 50 	-	loss: 1257.245


Global seed set to 0


step: 49 	/ 50 	|	loss: -9349.171
Finished training!
Time Series #7
step: 0 	/ 50 	-	loss: 660.187
step: 49 	/ 50 	|	loss: -5783.942
Finished training!
Time Series #8

Global seed set to 0



step: 0 	/ 50 	-	loss: 3204.810
step: 49 	/ 50 	|	loss: -24521.367
Finished training!


Global seed set to 0


Time Series #9
step: 0 	/ 50 	-	loss: 393.875
step: 49 	/ 50 	|	loss: -2922.276
Finished training!
Time Series #10

Global seed set to 0



step: 0 	/ 50 	-	loss: 722.220


Global seed set to 0


step: 49 	/ 50 	|	loss: -4710.403
Finished training!
Time Series #11
step: 0 	/ 50 	-	loss: 915.238


Global seed set to 0


step: 49 	/ 50 	|	loss: -7373.515
Finished training!
Time Series #12
step: 0 	/ 50 	-	loss: 1590.890


Global seed set to 0


step: 49 	/ 50 	|	loss: -12218.986
Finished training!
Time Series #13
step: 0 	/ 50 	-	loss: 918.953


Global seed set to 0


step: 49 	/ 50 	|	loss: -6722.096
Finished training!
Time Series #14
step: 0 	/ 50 	-	loss: 400.681


Global seed set to 0


step: 49 	/ 50 	|	loss: -2824.478
Finished training!
Time Series #15
step: 0 	/ 50 	-	loss: 912.478


Global seed set to 0


step: 49 	/ 50 	|	loss: -6393.878
Finished training!
Time Series #16
step: 0 	/ 50 	-	loss: 2281.896
step: 49 	/ 50 	|	loss: -16158.610
Finished training!


Global seed set to 0


Time Series #17
step: 0 	/ 50 	-	loss: 2245.863
step: 49 	/ 50 	|	loss: -17157.523
Finished training!


Global seed set to 0


Time Series #18
step: 0 	/ 50 	-	loss: 2484.038
step: 49 	/ 50 	|	loss: -18827.602
Finished training!


Global seed set to 0


Time Series #19
step: 0 	/ 50 	-	loss: 410.588
step: 49 	/ 50 	|	loss: -2315.951
Finished training!


Global seed set to 0


Time Series #20
step: 0 	/ 50 	-	loss: 2603.295
step: 49 	/ 50 	|	loss: -20277.570
Finished training!


Global seed set to 0


Time Series #21
step: 0 	/ 50 	-	loss: 3198.224
step: 49 	/ 50 	|	loss: -25290.838
Finished training!


Global seed set to 0


Time Series #22
step: 0 	/ 50 	-	loss: 915.211
step: 49 	/ 50 	|	loss: -7270.840
Finished training!


Global seed set to 0


Time Series #23
step: 0 	/ 50 	-	loss: 2949.801
step: 49 	/ 50 	|	loss: -21690.602
Finished training!


Global seed set to 0


Time Series #24
step: 0 	/ 50 	-	loss: 3146.854
step: 49 	/ 50 	|	loss: -24643.225
Finished training!


Global seed set to 0


Time Series #25
step: 0 	/ 50 	-	loss: 1095.491


Global seed set to 0


step: 49 	/ 50 	|	loss: -7805.714
Finished training!
Time Series #26
step: 0 	/ 50 	-	loss: 1466.219


Global seed set to 0


step: 49 	/ 50 	|	loss: -12162.444
Finished training!
Time Series #27
step: 0 	/ 50 	-	loss: 1010.035


Global seed set to 0


step: 49 	/ 50 	|	loss: -7960.254
Finished training!
Time Series #28
step: 0 	/ 50 	-	loss: 3017.826
step: 49 	/ 50 	|	loss: -23729.287
Finished training!


Global seed set to 0


Time Series #29
step: 0 	/ 50 	-	loss: 699.637


Global seed set to 0


step: 49 	/ 50 	|	loss: -5379.661
Finished training!
Time Series #30
step: 0 	/ 50 	-	loss: 548.267
step: 49 	/ 50 	|	loss: -4258.558
Finished training!
Time Series #31

Global seed set to 0



step: 0 	/ 50 	-	loss: 652.660
step: 49 	/ 50 	|	loss: -4991.904
Finished training!


Global seed set to 0


Time Series #32
step: 0 	/ 50 	-	loss: 887.171


Global seed set to 0


step: 49 	/ 50 	|	loss: -6586.471
Finished training!
Time Series #33
step: 0 	/ 50 	-	loss: 1986.952
step: 49 	/ 50 	|	loss: -15629.572
Finished training!
Time Series #34

Global seed set to 0



step: 0 	/ 50 	-	loss: 1075.489


Global seed set to 0


step: 49 	/ 50 	|	loss: -8942.117
Finished training!
Time Series #35
step: 0 	/ 50 	-	loss: 3119.288
step: 49 	/ 50 	|	loss: -24585.176
Finished training!


Global seed set to 0


Time Series #36
step: 0 	/ 50 	-	loss: 2820.903
step: 49 	/ 50 	|	loss: -20803.307
Finished training!


Global seed set to 0


Time Series #37
step: 0 	/ 50 	-	loss: 2820.903
step: 49 	/ 50 	|	loss: -20803.307
Finished training!


Global seed set to 0


Time Series #38
step: 0 	/ 50 	-	loss: 1330.400


Global seed set to 0


step: 49 	/ 50 	|	loss: -10340.246
Finished training!
Time Series #39
step: 0 	/ 50 	-	loss: 3034.080
step: 49 	/ 50 	|	loss: -24488.543
Finished training!


Global seed set to 0


Time Series #40
step: 0 	/ 50 	-	loss: 1810.914
step: 49 	/ 50 	|	loss: -12899.373
Finished training!
Time Series #41

Global seed set to 0



step: 0 	/ 50 	-	loss: 2663.096
step: 49 	/ 50 	|	loss: -23304.184
Finished training!


Global seed set to 0


Time Series #42
step: 0 	/ 50 	-	loss: 3369.032
step: 49 	/ 50 	|	loss: -23243.318
Finished training!


Global seed set to 0


Time Series #43
step: 0 	/ 50 	-	loss: 1541.457
step: 49 	/ 50 	|	loss: -10865.504
Finished training!


Global seed set to 0


Time Series #44
step: 0 	/ 50 	-	loss: 2059.469
step: 49 	/ 50 	|	loss: -12310.977
Finished training!


Global seed set to 0


Time Series #45
step: 0 	/ 50 	-	loss: 798.292


Global seed set to 0


step: 49 	/ 50 	|	loss: -5887.704
Finished training!
Time Series #46
step: 0 	/ 50 	-	loss: 490.157


Global seed set to 0


step: 49 	/ 50 	|	loss: -3763.029
Finished training!
Time Series #47
step: 0 	/ 50 	-	loss: 912.861


Global seed set to 0


step: 49 	/ 50 	|	loss: -6904.423
Finished training!
Time Series #48
step: 0 	/ 50 	-	loss: 1348.158


Global seed set to 0


step: 49 	/ 50 	|	loss: -9514.721
Finished training!
Time Series #49
step: 0 	/ 50 	-	loss: 915.808


Global seed set to 0


step: 49 	/ 50 	|	loss: -7350.725
Finished training!
Time Series #50
step: 0 	/ 50 	-	loss: 3202.471
step: 49 	/ 50 	|	loss: -25408.855
Finished training!


Global seed set to 0


Time Series #51
step: 0 	/ 50 	-	loss: 1348.220


Global seed set to 0


step: 49 	/ 50 	|	loss: -9665.752
Finished training!
Time Series #52
step: 0 	/ 50 	-	loss: 609.513


Global seed set to 0


step: 49 	/ 50 	|	loss: -4152.338
Finished training!
Time Series #53
step: 0 	/ 50 	-	loss: 2673.720
step: 49 	/ 50 	|	loss: -18126.496
Finished training!


Global seed set to 0


Time Series #54
step: 0 	/ 50 	-	loss: 725.370


Global seed set to 0


step: 49 	/ 50 	|	loss: -5083.606
Finished training!
Time Series #55
step: 0 	/ 50 	-	loss: 2245.856
step: 49 	/ 50 	|	loss: -17639.260
Finished training!


Global seed set to 0


Time Series #56
step: 0 	/ 50 	-	loss: 1052.348
step: 49 	/ 50 	|	loss: -7338.034
Finished training!


Global seed set to 0


Time Series #57
step: 0 	/ 50 	-	loss: 1979.477
step: 49 	/ 50 	|	loss: -14026.289
Finished training!


Global seed set to 0


Time Series #58
step: 0 	/ 50 	-	loss: 1927.423
step: 49 	/ 50 	|	loss: -14561.477
Finished training!


Global seed set to 0


Time Series #59
step: 0 	/ 50 	-	loss: 956.456
step: 49 	/ 50 	|	loss: -7816.380
Finished training!


Global seed set to 0


Time Series #60
step: 0 	/ 50 	-	loss: 3352.896
step: 49 	/ 50 	|	loss: -23562.961
Finished training!


Global seed set to 0


Time Series #61
step: 0 	/ 50 	-	loss: 1862.004
step: 49 	/ 50 	|	loss: -13936.098
Finished training!


Global seed set to 0


Time Series #62
step: 0 	/ 50 	-	loss: 409.259
step: 49 	/ 50 	|	loss: -2787.762
Finished training!
Time Series #63

Global seed set to 0



step: 0 	/ 50 	-	loss: 1501.287
step: 49 	/ 50 	|	loss: -11105.213
Finished training!
Time Series #64

Global seed set to 0



step: 0 	/ 50 	-	loss: 1688.932
step: 49 	/ 50 	|	loss: -11538.662
Finished training!
Time Series #65

Global seed set to 0



step: 0 	/ 50 	-	loss: 722.119


Global seed set to 0


step: 49 	/ 50 	|	loss: -4549.507
Finished training!
Time Series #66
step: 0 	/ 50 	-	loss: 1687.745
step: 49 	/ 50 	|	loss: -14410.542
Finished training!


Global seed set to 0


Time Series #67
step: 0 	/ 50 	-	loss: 2245.872
step: 49 	/ 50 	|	loss: -18473.771
Finished training!


Global seed set to 0


Time Series #68
step: 0 	/ 50 	-	loss: 1362.944


Global seed set to 0


step: 49 	/ 50 	|	loss: -9334.985
Finished training!
Time Series #69
step: 0 	/ 50 	-	loss: 3490.205
step: 49 	/ 50 	|	loss: -26575.375
Finished training!


Global seed set to 0


Time Series #70
step: 0 	/ 50 	-	loss: 1036.250
step: 49 	/ 50 	|	loss: -7861.619
Finished training!
Time Series #71

Global seed set to 0



step: 0 	/ 50 	-	loss: 3079.672
step: 49 	/ 50 	|	loss: -22221.660
Finished training!


Global seed set to 0


Time Series #72
step: 0 	/ 50 	-	loss: 2004.635
step: 49 	/ 50 	|	loss: -15070.824
Finished training!


Global seed set to 0


Time Series #73
step: 0 	/ 50 	-	loss: 2000.785
step: 49 	/ 50 	|	loss: -13527.969
Finished training!


Global seed set to 0


Time Series #74
step: 0 	/ 50 	-	loss: 2245.865
step: 49 	/ 50 	|	loss: -18080.037
Finished training!


Global seed set to 0


Time Series #75
step: 0 	/ 50 	-	loss: 2357.605
step: 49 	/ 50 	|	loss: -16636.145
Finished training!


Global seed set to 0


Time Series #76
step: 0 	/ 50 	-	loss: 1975.907
step: 49 	/ 50 	|	loss: -14374.312
Finished training!


Global seed set to 0


Time Series #77
step: 0 	/ 50 	-	loss: 1393.246
step: 49 	/ 50 	|	loss: -9435.571
Finished training!
Time Series #78

Global seed set to 0



step: 0 	/ 50 	-	loss: 3454.174
step: 49 	/ 50 	|	loss: -27278.721
Finished training!


Global seed set to 0


Time Series #79
step: 0 	/ 50 	-	loss: 3481.122
step: 49 	/ 50 	|	loss: -28064.289
Finished training!


Global seed set to 0


Time Series #80
step: 0 	/ 50 	-	loss: 1680.694
step: 49 	/ 50 	|	loss: -11525.883
Finished training!


Global seed set to 0


Time Series #81
step: 0 	/ 50 	-	loss: 3433.078
step: 49 	/ 50 	|	loss: -23733.467
Finished training!


Global seed set to 0


Time Series #82
step: 0 	/ 50 	-	loss: 1732.575
step: 49 	/ 50 	|	loss: -12913.664
Finished training!


Global seed set to 0


Time Series #83
step: 0 	/ 50 	-	loss: 1617.770
step: 49 	/ 50 	|	loss: -11534.928
Finished training!


Global seed set to 0


Time Series #84
step: 0 	/ 50 	-	loss: 2167.543
step: 49 	/ 50 	|	loss: -19737.393
Finished training!


Global seed set to 0


Time Series #85
step: 0 	/ 50 	-	loss: 1667.963
step: 49 	/ 50 	|	loss: -12838.759
Finished training!


Global seed set to 0


Time Series #86
step: 0 	/ 50 	-	loss: 3381.693
step: 49 	/ 50 	|	loss: -25575.057
Finished training!


Global seed set to 0


Time Series #87
step: 0 	/ 50 	-	loss: 1257.267


Global seed set to 0


step: 49 	/ 50 	|	loss: -8415.230
Finished training!
Time Series #88
step: 0 	/ 50 	-	loss: 652.687


Global seed set to 0


step: 49 	/ 50 	|	loss: -5016.790
Finished training!
Time Series #89
step: 0 	/ 50 	-	loss: 1146.644
step: 49 	/ 50 	|	loss: -7054.898
Finished training!


Global seed set to 0


Time Series #90
step: 0 	/ 50 	-	loss: 2143.130
step: 49 	/ 50 	|	loss: -14747.441
Finished training!


Global seed set to 0


Time Series #91
step: 0 	/ 50 	-	loss: 1052.518
step: 49 	/ 50 	|	loss: -7804.412
Finished training!


Global seed set to 0


Time Series #92
step: 0 	/ 50 	-	loss: 1178.479


Global seed set to 0


step: 49 	/ 50 	|	loss: -9186.564
Finished training!
Time Series #93
step: 0 	/ 50 	-	loss: 693.275


Global seed set to 0


step: 49 	/ 50 	|	loss: -5394.197
Finished training!
Time Series #94
step: 0 	/ 50 	-	loss: 731.484


Global seed set to 0


step: 49 	/ 50 	|	loss: -5520.536
Finished training!
Time Series #95
step: 0 	/ 50 	-	loss: 1354.777


Global seed set to 0


step: 49 	/ 50 	|	loss: -10058.863
Finished training!
Time Series #96
step: 0 	/ 50 	-	loss: 1249.567


Global seed set to 0


step: 49 	/ 50 	|	loss: -9063.104
Finished training!
Time Series #97
step: 0 	/ 50 	-	loss: 3393.610
step: 49 	/ 50 	|	loss: -29642.854
Finished training!


Global seed set to 0


Time Series #98
step: 0 	/ 50 	-	loss: 3481.111
step: 49 	/ 50 	|	loss: -26574.676
Finished training!


Global seed set to 0


Time Series #99
step: 0 	/ 50 	-	loss: 774.763
step: 49 	/ 50 	|	loss: -5006.361
Finished training!


Time: 11 min 35 sec

In [13]:
ts_iterator = get_solar_energy_dataset(solar_energy_dataset_path, max_results=10)
synthetic_path = f"{solar_energy_dataset_path}synthetic/FourierFlow/"

for ts_index, time_series in enumerate(ts_iterator):
    print(f"Time Series #{ts_index}")
    
    train_ts = time_series[: 10_000]
    train_ts = log_returns(train_ts + 1e-9)
    train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

    seed_everything(0)
    FF_model = FourierFlow(hidden=200, fft_size=len(train_ts), n_flows=10, normalize=False)

    FF_losses = FF_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
                            learning_rate=1e-3, display_step=50)

    synth_data = FF_model.sample(n_samples // len(train_ts))
    np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

    del train_ts, synth_data, FF_model, FF_losses

Global seed set to 0


Time Series #0
step: 0 	/ 50 	-	loss: 12285.785
step: 49 	/ 50 	|	loss: -53392.664
Finished training!


Global seed set to 0


Time Series #1
step: 0 	/ 50 	-	loss: 12285.741
step: 49 	/ 50 	|	loss: -54767.930
Finished training!


Global seed set to 0


Time Series #2
step: 0 	/ 50 	-	loss: 12285.340
step: 49 	/ 50 	|	loss: -55719.383
Finished training!


Global seed set to 0


Time Series #3
step: 0 	/ 50 	-	loss: 12283.609
step: 49 	/ 50 	|	loss: -57238.457
Finished training!


Global seed set to 0


Time Series #4
step: 0 	/ 50 	-	loss: 12287.857
step: 49 	/ 50 	|	loss: -54711.328
Finished training!


Global seed set to 0


Time Series #5
step: 0 	/ 50 	-	loss: 12284.594
step: 49 	/ 50 	|	loss: -54467.160
Finished training!


Global seed set to 0


Time Series #6
step: 0 	/ 50 	-	loss: 12287.475
step: 49 	/ 50 	|	loss: -57119.398
Finished training!


Global seed set to 0


Time Series #7
step: 0 	/ 50 	-	loss: 12287.970
step: 49 	/ 50 	|	loss: -54194.988
Finished training!


Global seed set to 0


Time Series #8
step: 0 	/ 50 	-	loss: 12286.185
step: 49 	/ 50 	|	loss: -55841.684
Finished training!


Global seed set to 0


Time Series #9
step: 0 	/ 50 	-	loss: 12287.316
step: 49 	/ 50 	|	loss: -51734.207
Finished training!


Time: 16 min 30 sec

# RealNVP

In [15]:
ts_iterator = get_hsm_dataset(hsm_dataset_path, selected_files=f"{hsm_dataset_path}/selected100.csv")
synthetic_path = f"{hsm_dataset_path}synthetic/RealNVP/"
seed_everything(0)

for ts_index, time_series in enumerate(ts_iterator):
    print(f"Time Series #{ts_index}")
    
    train_ts = log_returns(time_series)
    train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

    RealNVP_model = RealNVP(hidden=200, T=len(train_ts), n_flows=10, normalize=False)

    RealNVP_losses = RealNVP_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
                            learning_rate=1e-3, display_step=50)

    synth_data = RealNVP_model.sample(n_samples // len(train_ts))
    np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

    del train_ts, synth_data, RealNVP_model, RealNVP_losses

Global seed set to 0


Time Series #0
step: 0 	/ 50 	-	loss: 1471.588
step: 49 	/ 50 	|	loss: -8622.956
Finished training!
Time Series #1
step: 0 	/ 50 	-	loss: 975.932
step: 49 	/ 50 	|	loss: -4691.843
Finished training!
Time Series #2
step: 0 	/ 50 	-	loss: 980.449
step: 49 	/ 50 	|	loss: -4640.819
Finished training!
Time Series #3
step: 0 	/ 50 	-	loss: 1436.910
step: 49 	/ 50 	|	loss: -5758.834
Finished training!
Time Series #4
step: 0 	/ 50 	-	loss: 3636.094
step: 49 	/ 50 	|	loss: -18605.061
Finished training!
Time Series #5
step: 0 	/ 50 	-	loss: 434.827
step: 49 	/ 50 	|	loss: -2586.895
Finished training!
Time Series #6
step: 0 	/ 50 	-	loss: 1219.745
step: 49 	/ 50 	|	loss: -5989.792
Finished training!
Time Series #7
step: 0 	/ 50 	-	loss: 695.225
step: 49 	/ 50 	|	loss: -4457.428
Finished training!
Time Series #8
step: 0 	/ 50 	-	loss: 3252.448
step: 49 	/ 50 	|	loss: -16789.691
Finished training!
Time Series #9
step: 0 	/ 50 	-	loss: 390.224
step: 49 	/ 50 	|	loss: -2010.325
Finished training!
Tim

Time: 8:22

In [4]:
ts_iterator = get_solar_energy_dataset(solar_energy_dataset_path, max_results=10)
synthetic_path = f"{solar_energy_dataset_path}synthetic/RealNVP/"
seed_everything(0)
start_point = 6
for _ in range(start_point): next(ts_iterator)

for ts_index, time_series in enumerate(ts_iterator, start_point):
    print(f"Time Series #{ts_index}")
    
    train_ts = time_series[: 10_000]
    train_ts = log_returns(train_ts + 1e-9)
    train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

    RealNVP_model = RealNVP(hidden=200, T=len(train_ts), n_flows=10, normalize=False)

    RealNVP_losses = RealNVP_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
                            learning_rate=1e-3, display_step=50)

    synth_data = RealNVP_model.sample(n_samples // len(train_ts))
    np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

    del train_ts, synth_data, RealNVP_model, RealNVP_losses

Global seed set to 0


Time Series #6
step: 0 	/ 50 	-	loss: 42233.957
step: 49 	/ 50 	|	loss: -98910.711
Finished training!
Time Series #7
step: 0 	/ 50 	-	loss: 49041.590
step: 49 	/ 50 	|	loss: -96680.203
Finished training!
Time Series #8
step: 0 	/ 50 	-	loss: 48661.266
step: 49 	/ 50 	|	loss: -95805.211
Finished training!
Time Series #9
step: 0 	/ 50 	-	loss: 44090.680
step: 49 	/ 50 	|	loss: -98405.523
Finished training!


Time: ~17 min

# TimeFlow

In [5]:
# synthetic_path = synthetic_path = f"{dataset_path}synthetic/TimeFlow/"
# ts_iterator = get_hsm_dataset(dataset_path, selected_files=f"{dataset_path}/selected.csv")
# seed_everything(0)

# for ts_index, time_series in enumerate(ts_iterator):
#     print(f"Time Series #{ts_index}")
    
#     (train_ts, *_), *_ = split_data(time_series, val_size=val_size, test_size=test_size)
#     train_ts = log_returns(train_ts)
#     train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

#     TimeFlow_model = TimeFlow(hidden=200, T=len(train_ts), n_flows=10, normalize=False)

#     TimeFlow_losses = TimeFlow_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
#                             learning_rate=1e-3, display_step=50)

#     synth_data = TimeFlow_model.sample(n_samples // len(train_ts))
#     np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

#     del train_ts, synth_data, TimeFlow_model, TimeFlow_losses

12 ts time: ~30 min

# Similarity

In [6]:
from tqdm import tqdm
from pathlib import Path

import numpy as np
import pandas as pd
from scipy.special import kl_div

from utils.data import get_hsm_dataset, split_data, log_returns

In [8]:
results_dir = Path("results")

In [9]:
sj_div = lambda x, y: (kl_div(x, (x + y) / 2) + kl_div(y, (x + y) / 2)) / 2
min_max_norm = lambda x: (x - x.min()) / (x.max() - x.min())

In [14]:
start_dataset = 0
start_ts = 0

for dataset_path, dataset_name in ((Path("data/huge_stock_market_dataset/"), "hsm"),\
     (Path("data/solar_energy"), "se")):
    if dataset_name == "hsm" and start_dataset == 1: continue
    print(f"processing {dataset_name} dataset")
    for model in ("FourierFlow", "RealNVP"):
        synthetic_path = dataset_path / f"synthetic/{model}/"
        results = {"kl_div": [], "sj_div": []}
        if dataset_name == "hsm":
            ts_iterator = get_hsm_dataset(dataset_path, selected_files=f"{dataset_path}/selected100.csv")
        else:
            ts_iterator = get_solar_energy_dataset(dataset_path, max_results=10)

        for ts_index, time_series in tqdm(enumerate(ts_iterator)):
            
            train_ts = log_returns(time_series if dataset_name == "hsm" else time_series[:10_000] + 1e-9)
            train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)].values.flatten()
            train_ts = min_max_norm(train_ts)
            
            synth_tss = np.load(synthetic_path / f"selected{ts_index}.npy")
            kl_div_res = sj_div_res = 0
            for synth_ts in synth_tss:
                synth_ts = min_max_norm(synth_ts)
                # synth_ts = np.histogram(synth_ts, bins=np.arange(start=0, stop=1, step=1/100))[0]
                # train_ts = np.histogram(train_ts, bins=np.arange(start=0, stop=1, step=1/100))[0]
                res = kl_div(synth_ts, train_ts)
                kl_div_res += np.where(np.isinf(res), 0, res).mean()
                sj_div_res += sj_div(synth_ts, train_ts).mean()
            results["kl_div"].append(kl_div_res / len(synth_tss))
            results["sj_div"].append(sj_div_res / len(synth_tss))
        
        pd.DataFrame(results).to_csv(results_dir / f"synth_{dataset_name}_sim_{model}.csv", index=False)

processing hsm dataset


100it [00:02, 47.59it/s]
100it [00:02, 47.42it/s]


processing se dataset


10it [00:00, 22.89it/s]
10it [00:00, 20.63it/s]
