In [4]:
import numpy as np
import pandas as pd

from tqdm import tqdm

import torch
from pytorch_lightning import seed_everything

from utils.data import get_hsm_dataset, split_data, log_returns, get_solar_energy_dataset, get_fuel_prices_dataset, get_passengers_dataset
from utils.metrics import MAPE, WAPE, MAE

from fourier_flows.SequentialFlows import FourierFlow, RealNVP, TimeFlow

In [5]:
hsm_dataset_path = "data/huge_stock_market_dataset/"
solar_energy_dataset_path = "data/solar_energy/"
fuel_prices_dataset_path = "data/fuel_prices/"
passengers_dataset_path = "data/air_passengers/"
models_dir = "models/"

In [6]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

val_size = 0.0
test_size = 0.0

T = 127
n_samples = 800 * 127  # number of samples generated by QuantGAN

cpu


# Fourier Flow

In [None]:
ts_iterator = get_hsm_dataset(hsm_dataset_path, selected_files=f"{hsm_dataset_path}/selected100.csv")
synthetic_path = f"{hsm_dataset_path}synthetic/FourierFlow/"

for ts_index, time_series in enumerate(ts_iterator):
    print(f"Time Series #{ts_index}")
    
    train_ts = log_returns(time_series)
    train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

    seed_everything(0)
    FF_model = FourierFlow(hidden=200, fft_size=len(train_ts), n_flows=10, normalize=False)

    FF_losses = FF_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
                            learning_rate=1e-3, display_step=50)

    synth_data = FF_model.sample(n_samples // len(train_ts))
    np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

    del train_ts, synth_data, FF_model, FF_losses

Time: 11 min 35 sec

In [None]:
ts_iterator = get_solar_energy_dataset(solar_energy_dataset_path, max_results=10)
synthetic_path = f"{solar_energy_dataset_path}synthetic/FourierFlow/"

for ts_index, time_series in enumerate(ts_iterator):
    print(f"Time Series #{ts_index}")
    
    train_ts = time_series[: 10_000]
    train_ts = log_returns(train_ts + 1e-9)
    train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

    seed_everything(0)
    FF_model = FourierFlow(hidden=200, fft_size=len(train_ts), n_flows=10, normalize=False)

    FF_losses = FF_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
                            learning_rate=1e-3, display_step=50)

    synth_data = FF_model.sample(n_samples // len(train_ts))
    np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

    del train_ts, synth_data, FF_model, FF_losses

Time: 16 min 30 sec

In [8]:
ts_iterator = get_fuel_prices_dataset(fuel_prices_dataset_path)
synthetic_path = f"{fuel_prices_dataset_path}synthetic/FourierFlow/"

for ts_index, time_series in enumerate(ts_iterator):
    print(f"Time Series #{ts_index}")
    
    train_ts = log_returns(time_series + 1e-9)
    train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

    seed_everything(0)
    FF_model = FourierFlow(hidden=200, fft_size=len(train_ts), n_flows=10, normalize=False)

    FF_losses = FF_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
                            learning_rate=1e-3, display_step=50)

    synth_data = FF_model.sample(n_samples // len(train_ts))
    np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

    del train_ts, synth_data, FF_model, FF_losses

Global seed set to 0


Time Series #0
step: 0 	/ 50 	-	loss: 1007.196
step: 49 	/ 50 	|	loss: -8093.336
Finished training!


Global seed set to 0


Time Series #1
step: 0 	/ 50 	-	loss: 1007.205


Global seed set to 0


step: 49 	/ 50 	|	loss: -7881.022
Finished training!
Time Series #2
step: 0 	/ 50 	-	loss: 1007.216
step: 49 	/ 50 	|	loss: -7869.522
Finished training!
Time Series #3

Global seed set to 0



step: 0 	/ 50 	-	loss: 1007.192


Global seed set to 0


step: 49 	/ 50 	|	loss: -8020.964
Finished training!
Time Series #4
step: 0 	/ 50 	-	loss: 1007.203


Global seed set to 0


step: 49 	/ 50 	|	loss: -7427.208
Finished training!
Time Series #5
step: 0 	/ 50 	-	loss: 1007.219


Global seed set to 0


step: 49 	/ 50 	|	loss: -7134.164
Finished training!
Time Series #6
step: 0 	/ 50 	-	loss: 1075.489


Global seed set to 0


step: 49 	/ 50 	|	loss: -6283.044
Finished training!
Time Series #7
step: 0 	/ 50 	-	loss: 1075.459
step: 49 	/ 50 	|	loss: -6265.708
Finished training!


In [10]:
ts_iterator = get_passengers_dataset(passengers_dataset_path)
synthetic_path = f"{passengers_dataset_path}synthetic/FourierFlow/"

for ts_index, time_series in enumerate(ts_iterator):
    print(f"Time Series #{ts_index}")
    
    train_ts = log_returns(time_series + 1e-9)
    train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

    seed_everything(0)
    FF_model = FourierFlow(hidden=200, fft_size=len(train_ts), n_flows=10, normalize=False)

    FF_losses = FF_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
                            learning_rate=1e-3, display_step=50)

    synth_data = FF_model.sample(n_samples // len(train_ts))
    np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

    del train_ts, synth_data, FF_model, FF_losses

Global seed set to 0


Time Series #0
step: 0 	/ 50 	-	loss: 305.136
step: 49 	/ 50 	|	loss: -1540.355
Finished training!


Global seed set to 0


Time Series #1
step: 0 	/ 50 	-	loss: 305.307
step: 49 	/ 50 	|	loss: -1468.619
Finished training!


Global seed set to 0


Time Series #2
step: 0 	/ 50 	-	loss: 295.896
step: 49 	/ 50 	|	loss: -1172.271
Finished training!


Global seed set to 0


Time Series #3
step: 0 	/ 50 	-	loss: 293.935
step: 49 	/ 50 	|	loss: -876.924
Finished training!


Global seed set to 0


Time Series #4
step: 0 	/ 50 	-	loss: 292.986
step: 49 	/ 50 	|	loss: -830.133
Finished training!


Global seed set to 0


Time Series #5
step: 0 	/ 50 	-	loss: 304.910
step: 49 	/ 50 	|	loss: -1492.005
Finished training!


Global seed set to 0


Time Series #6
step: 0 	/ 50 	-	loss: 321.777
step: 49 	/ 50 	|	loss: -807.764
Finished training!


Global seed set to 0


Time Series #7
step: 0 	/ 50 	-	loss: 305.810
step: 49 	/ 50 	|	loss: -1498.130
Finished training!


Global seed set to 0


Time Series #8
step: 0 	/ 50 	-	loss: 270.687
step: 49 	/ 50 	|	loss: -1239.260
Finished training!


Global seed set to 0


Time Series #9
step: 0 	/ 50 	-	loss: 279.194
step: 49 	/ 50 	|	loss: -1110.985
Finished training!


Global seed set to 0


Time Series #10
step: 0 	/ 50 	-	loss: 280.413
step: 49 	/ 50 	|	loss: -1077.194
Finished training!


Global seed set to 0


Time Series #11
step: 0 	/ 50 	-	loss: 280.392
step: 49 	/ 50 	|	loss: -1277.495
Finished training!


Global seed set to 0


Time Series #12
step: 0 	/ 50 	-	loss: 305.588
step: 49 	/ 50 	|	loss: -1478.924
Finished training!


Global seed set to 0


Time Series #13
step: 0 	/ 50 	-	loss: 279.781
step: 49 	/ 50 	|	loss: -1240.741
Finished training!


Global seed set to 0


Time Series #14
step: 0 	/ 50 	-	loss: 272.322
step: 49 	/ 50 	|	loss: -795.190
Finished training!


Global seed set to 0


Time Series #15
step: 0 	/ 50 	-	loss: 305.586
step: 49 	/ 50 	|	loss: -1521.450
Finished training!


Global seed set to 0


Time Series #16
step: 0 	/ 50 	-	loss: 305.523
step: 49 	/ 50 	|	loss: -1492.632
Finished training!


Global seed set to 0


Time Series #17
step: 0 	/ 50 	-	loss: 305.377
step: 49 	/ 50 	|	loss: -1393.289
Finished training!


Global seed set to 0


Time Series #18
step: 0 	/ 50 	-	loss: 304.985
step: 49 	/ 50 	|	loss: -1534.810
Finished training!


Global seed set to 0


Time Series #19
step: 0 	/ 50 	-	loss: 304.170
step: 49 	/ 50 	|	loss: -865.577
Finished training!


Global seed set to 0


Time Series #20
step: 0 	/ 50 	-	loss: 305.549
step: 49 	/ 50 	|	loss: -1432.996
Finished training!


Global seed set to 0


Time Series #21
step: 0 	/ 50 	-	loss: 271.014
step: 49 	/ 50 	|	loss: -1138.829
Finished training!


Global seed set to 0


Time Series #22
step: 0 	/ 50 	-	loss: 305.213
step: 49 	/ 50 	|	loss: -1421.746
Finished training!


Global seed set to 0


Time Series #23
step: 0 	/ 50 	-	loss: 271.112
step: 49 	/ 50 	|	loss: -1242.507
Finished training!


Global seed set to 0


Time Series #24
step: 0 	/ 50 	-	loss: 305.407
step: 49 	/ 50 	|	loss: -1396.866
Finished training!


Global seed set to 0


Time Series #25
step: 0 	/ 50 	-	loss: 280.317
step: 49 	/ 50 	|	loss: -1251.527
Finished training!


Global seed set to 0


Time Series #26
step: 0 	/ 50 	-	loss: 305.269
step: 49 	/ 50 	|	loss: -1443.604
Finished training!


Global seed set to 0


Time Series #27
step: 0 	/ 50 	-	loss: 305.496
step: 49 	/ 50 	|	loss: -1386.807
Finished training!


Global seed set to 0


Time Series #28
step: 0 	/ 50 	-	loss: 317.863
step: 49 	/ 50 	|	loss: -745.193
Finished training!


Global seed set to 0


Time Series #29
step: 0 	/ 50 	-	loss: 305.734
step: 49 	/ 50 	|	loss: -1606.737
Finished training!


Global seed set to 0


Time Series #30
step: 0 	/ 50 	-	loss: 278.369
step: 49 	/ 50 	|	loss: -1241.460
Finished training!


Global seed set to 0


Time Series #31
step: 0 	/ 50 	-	loss: 308.394
step: 49 	/ 50 	|	loss: -846.803
Finished training!


Global seed set to 0


Time Series #32
step: 0 	/ 50 	-	loss: 280.073
step: 49 	/ 50 	|	loss: -1222.765
Finished training!


Global seed set to 0


Time Series #33
step: 0 	/ 50 	-	loss: 299.678
step: 49 	/ 50 	|	loss: -1272.521
Finished training!


Global seed set to 0


Time Series #34
step: 0 	/ 50 	-	loss: 305.566
step: 49 	/ 50 	|	loss: -1422.877
Finished training!


Global seed set to 0


Time Series #35
step: 0 	/ 50 	-	loss: 305.237
step: 49 	/ 50 	|	loss: -1397.380
Finished training!


Global seed set to 0


Time Series #36
step: 0 	/ 50 	-	loss: 283.414
step: 49 	/ 50 	|	loss: -912.099
Finished training!


Global seed set to 0


Time Series #37
step: 0 	/ 50 	-	loss: 251.383
step: 49 	/ 50 	|	loss: -1146.078
Finished training!


Global seed set to 0


Time Series #38
step: 0 	/ 50 	-	loss: 306.207
step: 49 	/ 50 	|	loss: -1296.089
Finished training!


Global seed set to 0


Time Series #39
step: 0 	/ 50 	-	loss: 305.448
step: 49 	/ 50 	|	loss: -1388.202
Finished training!


Global seed set to 0


Time Series #40
step: 0 	/ 50 	-	loss: 299.116
step: 49 	/ 50 	|	loss: -1383.200
Finished training!


Global seed set to 0


Time Series #41
step: 0 	/ 50 	-	loss: 305.562
step: 49 	/ 50 	|	loss: -1356.103
Finished training!


Global seed set to 0


Time Series #42
step: 0 	/ 50 	-	loss: 279.883
step: 49 	/ 50 	|	loss: -988.251
Finished training!


Global seed set to 0


Time Series #43
step: 0 	/ 50 	-	loss: 283.290
step: 49 	/ 50 	|	loss: -806.581
Finished training!


Global seed set to 0


Time Series #44
step: 0 	/ 50 	-	loss: 305.500
step: 49 	/ 50 	|	loss: -1477.191
Finished training!


Global seed set to 0


Time Series #45
step: 0 	/ 50 	-	loss: 305.877
step: 49 	/ 50 	|	loss: -1399.636
Finished training!


Global seed set to 0


Time Series #46
step: 0 	/ 50 	-	loss: 280.183
step: 49 	/ 50 	|	loss: -1152.755
Finished training!


Global seed set to 0


Time Series #47
step: 0 	/ 50 	-	loss: 304.777
step: 49 	/ 50 	|	loss: -935.172
Finished training!


Global seed set to 0


Time Series #48
step: 0 	/ 50 	-	loss: 305.422
step: 49 	/ 50 	|	loss: -1502.914
Finished training!


Global seed set to 0


Time Series #49
step: 0 	/ 50 	-	loss: 305.243
step: 49 	/ 50 	|	loss: -1429.938
Finished training!


Global seed set to 0


Time Series #50
step: 0 	/ 50 	-	loss: 272.547
step: 49 	/ 50 	|	loss: -799.953
Finished training!


Global seed set to 0


Time Series #51
step: 0 	/ 50 	-	loss: 305.391
step: 49 	/ 50 	|	loss: -1495.573
Finished training!


Global seed set to 0


Time Series #52
step: 0 	/ 50 	-	loss: 300.912
step: 49 	/ 50 	|	loss: -1301.208
Finished training!


Global seed set to 0


Time Series #53
step: 0 	/ 50 	-	loss: 301.106
step: 49 	/ 50 	|	loss: -1296.462
Finished training!


Global seed set to 0


Time Series #54
step: 0 	/ 50 	-	loss: 305.439
step: 49 	/ 50 	|	loss: -1483.470
Finished training!


Global seed set to 0


Time Series #55
step: 0 	/ 50 	-	loss: 265.876
step: 49 	/ 50 	|	loss: -731.281
Finished training!


Global seed set to 0


Time Series #56
step: 0 	/ 50 	-	loss: 305.610
step: 49 	/ 50 	|	loss: -1279.839
Finished training!


Global seed set to 0


Time Series #57
step: 0 	/ 50 	-	loss: 271.170
step: 49 	/ 50 	|	loss: -1220.361
Finished training!


Global seed set to 0


Time Series #58
step: 0 	/ 50 	-	loss: 306.294
step: 49 	/ 50 	|	loss: -1315.831
Finished training!


Global seed set to 0


Time Series #59
step: 0 	/ 50 	-	loss: 298.442
step: 49 	/ 50 	|	loss: -1169.684
Finished training!


Global seed set to 0


Time Series #60
step: 0 	/ 50 	-	loss: 298.012
step: 49 	/ 50 	|	loss: -875.660
Finished training!


Global seed set to 0


Time Series #61
step: 0 	/ 50 	-	loss: 305.600
step: 49 	/ 50 	|	loss: -1519.421
Finished training!


Global seed set to 0


Time Series #62
step: 0 	/ 50 	-	loss: 302.807
step: 49 	/ 50 	|	loss: -878.268
Finished training!


Global seed set to 0


Time Series #63
step: 0 	/ 50 	-	loss: 305.297
step: 49 	/ 50 	|	loss: -1481.498
Finished training!


Global seed set to 0


Time Series #64
step: 0 	/ 50 	-	loss: 301.360
step: 49 	/ 50 	|	loss: -1485.479
Finished training!


Global seed set to 0


Time Series #65
step: 0 	/ 50 	-	loss: 305.385
step: 49 	/ 50 	|	loss: -960.989
Finished training!


Global seed set to 0


Time Series #66
step: 0 	/ 50 	-	loss: 271.989
step: 49 	/ 50 	|	loss: -833.322
Finished training!


Global seed set to 0


Time Series #67
step: 0 	/ 50 	-	loss: 287.282
step: 49 	/ 50 	|	loss: -731.736
Finished training!


Global seed set to 0


Time Series #68
step: 0 	/ 50 	-	loss: 305.739
step: 49 	/ 50 	|	loss: -1347.890
Finished training!


Global seed set to 0


Time Series #69
step: 0 	/ 50 	-	loss: 260.629
step: 49 	/ 50 	|	loss: -766.615
Finished training!


Global seed set to 0


Time Series #70
step: 0 	/ 50 	-	loss: 306.457
step: 49 	/ 50 	|	loss: -1307.295
Finished training!


Global seed set to 0


Time Series #71
step: 0 	/ 50 	-	loss: 288.299
step: 49 	/ 50 	|	loss: -644.511
Finished training!


Global seed set to 0


Time Series #72
step: 0 	/ 50 	-	loss: 305.842
step: 49 	/ 50 	|	loss: -1456.309
Finished training!


Global seed set to 0


Time Series #73
step: 0 	/ 50 	-	loss: 266.825
step: 49 	/ 50 	|	loss: -680.646
Finished training!


Global seed set to 0


Time Series #74
step: 0 	/ 50 	-	loss: 289.711
step: 49 	/ 50 	|	loss: -784.002
Finished training!


Global seed set to 0


Time Series #75
step: 0 	/ 50 	-	loss: 305.582
step: 49 	/ 50 	|	loss: -1464.402
Finished training!


Global seed set to 0


Time Series #76
step: 0 	/ 50 	-	loss: 305.698
step: 49 	/ 50 	|	loss: -1500.163
Finished training!


Global seed set to 0


Time Series #77
step: 0 	/ 50 	-	loss: 305.409
step: 49 	/ 50 	|	loss: -1533.683
Finished training!


Global seed set to 0


Time Series #78
step: 0 	/ 50 	-	loss: 349.563
step: 49 	/ 50 	|	loss: -846.140
Finished training!


Global seed set to 0


Time Series #79
step: 0 	/ 50 	-	loss: 305.537
step: 49 	/ 50 	|	loss: -1486.837
Finished training!


Global seed set to 0


Time Series #80
step: 0 	/ 50 	-	loss: 252.899
step: 49 	/ 50 	|	loss: -929.442
Finished training!


Global seed set to 0


Time Series #81
step: 0 	/ 50 	-	loss: 305.126
step: 49 	/ 50 	|	loss: -1274.310
Finished training!


Global seed set to 0


Time Series #82
step: 0 	/ 50 	-	loss: 301.991
step: 49 	/ 50 	|	loss: -829.402
Finished training!


Global seed set to 0


Time Series #83
step: 0 	/ 50 	-	loss: 305.367
step: 49 	/ 50 	|	loss: -1365.817
Finished training!


Global seed set to 0


Time Series #84
step: 0 	/ 50 	-	loss: 294.782
step: 49 	/ 50 	|	loss: -1160.227
Finished training!


Global seed set to 0


Time Series #85
step: 0 	/ 50 	-	loss: 279.497
step: 49 	/ 50 	|	loss: -741.510
Finished training!


Global seed set to 0


Time Series #86
step: 0 	/ 50 	-	loss: 305.463
step: 49 	/ 50 	|	loss: -1464.388
Finished training!


Global seed set to 0


Time Series #87
step: 0 	/ 50 	-	loss: 251.155
step: 49 	/ 50 	|	loss: -1089.285
Finished training!


Global seed set to 0


Time Series #88
step: 0 	/ 50 	-	loss: 305.724
step: 49 	/ 50 	|	loss: -1487.976
Finished training!


Global seed set to 0


Time Series #89
step: 0 	/ 50 	-	loss: 305.892
step: 49 	/ 50 	|	loss: -1350.259
Finished training!


Global seed set to 0


Time Series #90
step: 0 	/ 50 	-	loss: 305.492
step: 49 	/ 50 	|	loss: -1461.350
Finished training!


Global seed set to 0


Time Series #91
step: 0 	/ 50 	-	loss: 305.478
step: 49 	/ 50 	|	loss: -1434.512
Finished training!


Global seed set to 0


Time Series #92
step: 0 	/ 50 	-	loss: 306.056
step: 49 	/ 50 	|	loss: -1382.760
Finished training!


Global seed set to 0


Time Series #93
step: 0 	/ 50 	-	loss: 261.212
step: 49 	/ 50 	|	loss: -748.993
Finished training!


Global seed set to 0


Time Series #94
step: 0 	/ 50 	-	loss: 305.527
step: 49 	/ 50 	|	loss: -1499.746
Finished training!


Global seed set to 0


Time Series #95
step: 0 	/ 50 	-	loss: 281.305
step: 49 	/ 50 	|	loss: -1120.865
Finished training!


Global seed set to 0


Time Series #96
step: 0 	/ 50 	-	loss: 270.053
step: 49 	/ 50 	|	loss: -701.619
Finished training!


Global seed set to 0


Time Series #97
step: 0 	/ 50 	-	loss: 305.386
step: 49 	/ 50 	|	loss: -1505.291
Finished training!


Global seed set to 0


Time Series #98
step: 0 	/ 50 	-	loss: 305.370
step: 49 	/ 50 	|	loss: -1581.360
Finished training!


Global seed set to 0


Time Series #99
step: 0 	/ 50 	-	loss: 270.948
step: 49 	/ 50 	|	loss: -797.716
Finished training!


# RealNVP

In [15]:
ts_iterator = get_hsm_dataset(hsm_dataset_path, selected_files=f"{hsm_dataset_path}/selected100.csv")
synthetic_path = f"{hsm_dataset_path}synthetic/RealNVP/"
seed_everything(0)

for ts_index, time_series in enumerate(ts_iterator):
    print(f"Time Series #{ts_index}")
    
    train_ts = log_returns(time_series)
    train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

    RealNVP_model = RealNVP(hidden=200, T=len(train_ts), n_flows=10, normalize=False)

    RealNVP_losses = RealNVP_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
                            learning_rate=1e-3, display_step=50)

    synth_data = RealNVP_model.sample(n_samples // len(train_ts))
    np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

    del train_ts, synth_data, RealNVP_model, RealNVP_losses

Global seed set to 0


Time Series #0
step: 0 	/ 50 	-	loss: 1471.588
step: 49 	/ 50 	|	loss: -8622.956
Finished training!
Time Series #1
step: 0 	/ 50 	-	loss: 975.932
step: 49 	/ 50 	|	loss: -4691.843
Finished training!
Time Series #2
step: 0 	/ 50 	-	loss: 980.449
step: 49 	/ 50 	|	loss: -4640.819
Finished training!
Time Series #3
step: 0 	/ 50 	-	loss: 1436.910
step: 49 	/ 50 	|	loss: -5758.834
Finished training!
Time Series #4
step: 0 	/ 50 	-	loss: 3636.094
step: 49 	/ 50 	|	loss: -18605.061
Finished training!
Time Series #5
step: 0 	/ 50 	-	loss: 434.827
step: 49 	/ 50 	|	loss: -2586.895
Finished training!
Time Series #6
step: 0 	/ 50 	-	loss: 1219.745
step: 49 	/ 50 	|	loss: -5989.792
Finished training!
Time Series #7
step: 0 	/ 50 	-	loss: 695.225
step: 49 	/ 50 	|	loss: -4457.428
Finished training!
Time Series #8
step: 0 	/ 50 	-	loss: 3252.448
step: 49 	/ 50 	|	loss: -16789.691
Finished training!
Time Series #9
step: 0 	/ 50 	-	loss: 390.224
step: 49 	/ 50 	|	loss: -2010.325
Finished training!
Tim

Time: 8:22

In [4]:
ts_iterator = get_solar_energy_dataset(solar_energy_dataset_path, max_results=10)
synthetic_path = f"{solar_energy_dataset_path}synthetic/RealNVP/"
seed_everything(0)
start_point = 6
for _ in range(start_point): next(ts_iterator)

for ts_index, time_series in enumerate(ts_iterator, start_point):
    print(f"Time Series #{ts_index}")
    
    train_ts = time_series[: 10_000]
    train_ts = log_returns(train_ts + 1e-9)
    train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

    RealNVP_model = RealNVP(hidden=200, T=len(train_ts), n_flows=10, normalize=False)

    RealNVP_losses = RealNVP_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
                            learning_rate=1e-3, display_step=50)

    synth_data = RealNVP_model.sample(n_samples // len(train_ts))
    np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

    del train_ts, synth_data, RealNVP_model, RealNVP_losses

Global seed set to 0


Time Series #6
step: 0 	/ 50 	-	loss: 42233.957
step: 49 	/ 50 	|	loss: -98910.711
Finished training!
Time Series #7
step: 0 	/ 50 	-	loss: 49041.590
step: 49 	/ 50 	|	loss: -96680.203
Finished training!
Time Series #8
step: 0 	/ 50 	-	loss: 48661.266
step: 49 	/ 50 	|	loss: -95805.211
Finished training!
Time Series #9
step: 0 	/ 50 	-	loss: 44090.680
step: 49 	/ 50 	|	loss: -98405.523
Finished training!


Time: ~17 min

In [12]:
ts_iterator = get_fuel_prices_dataset(fuel_prices_dataset_path)
synthetic_path = f"{fuel_prices_dataset_path}synthetic/RealNVP/"
seed_everything(0)
start_point = 0
for _ in range(start_point): next(ts_iterator)

for ts_index, time_series in enumerate(ts_iterator, start_point):
    print(f"Time Series #{ts_index}")
    
    train_ts = time_series
    train_ts = log_returns(train_ts + 1e-9)
    train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

    RealNVP_model = RealNVP(hidden=200, T=len(train_ts), n_flows=10, normalize=False)

    RealNVP_losses = RealNVP_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
                            learning_rate=1e-3, display_step=50)

    synth_data = RealNVP_model.sample(n_samples // len(train_ts))
    np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

    del train_ts, synth_data, RealNVP_model, RealNVP_losses

Global seed set to 0


Time Series #0
step: 0 	/ 50 	-	loss: 996.548
step: 49 	/ 50 	|	loss: -5621.836
Finished training!
Time Series #1
step: 0 	/ 50 	-	loss: 1008.333
step: 49 	/ 50 	|	loss: -5625.384
Finished training!
Time Series #2
step: 0 	/ 50 	-	loss: 1005.361
step: 49 	/ 50 	|	loss: -5210.261
Finished training!
Time Series #3
step: 0 	/ 50 	-	loss: 1031.542
step: 49 	/ 50 	|	loss: -6524.273
Finished training!
Time Series #4
step: 0 	/ 50 	-	loss: 1011.315
step: 49 	/ 50 	|	loss: -4968.704
Finished training!
Time Series #5
step: 0 	/ 50 	-	loss: 1003.746
step: 49 	/ 50 	|	loss: -4339.809
Finished training!
Time Series #6
step: 0 	/ 50 	-	loss: 1155.681
step: 49 	/ 50 	|	loss: -2741.663
Finished training!
Time Series #7
step: 0 	/ 50 	-	loss: 1201.437
step: 49 	/ 50 	|	loss: -2789.180
Finished training!


In [13]:
ts_iterator = get_passengers_dataset(passengers_dataset_path)
synthetic_path = f"{passengers_dataset_path}synthetic/RealNVP/"
seed_everything(0)
start_point = 0
for _ in range(start_point): next(ts_iterator)

for ts_index, time_series in enumerate(ts_iterator, start_point):
    print(f"Time Series #{ts_index}")
    
    train_ts = time_series
    train_ts = log_returns(train_ts + 1e-9)
    train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

    RealNVP_model = RealNVP(hidden=200, T=len(train_ts), n_flows=10, normalize=False)

    RealNVP_losses = RealNVP_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
                            learning_rate=1e-3, display_step=50)

    synth_data = RealNVP_model.sample(n_samples // len(train_ts))
    np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

    del train_ts, synth_data, RealNVP_model, RealNVP_losses

Global seed set to 0


Time Series #0
step: 0 	/ 50 	-	loss: 313.163
step: 49 	/ 50 	|	loss: -885.714
Finished training!
Time Series #1
step: 0 	/ 50 	-	loss: 314.108
step: 49 	/ 50 	|	loss: -910.609
Finished training!
Time Series #2
step: 0 	/ 50 	-	loss: 480.433
step: 49 	/ 50 	|	loss: -590.479
Finished training!
Time Series #3
step: 0 	/ 50 	-	loss: 3269.207
step: 49 	/ 50 	|	loss: -714.761
Finished training!
Time Series #4
step: 0 	/ 50 	-	loss: 2369.000
step: 49 	/ 50 	|	loss: -652.783
Finished training!
Time Series #5
step: 0 	/ 50 	-	loss: 355.548
step: 49 	/ 50 	|	loss: -816.101
Finished training!
Time Series #6
step: 0 	/ 50 	-	loss: 4555.938
step: 49 	/ 50 	|	loss: -616.586
Finished training!
Time Series #7
step: 0 	/ 50 	-	loss: 310.425
step: 49 	/ 50 	|	loss: -749.431
Finished training!
Time Series #8
step: 0 	/ 50 	-	loss: 316.292
step: 49 	/ 50 	|	loss: -639.886
Finished training!
Time Series #9
step: 0 	/ 50 	-	loss: 344.307
step: 49 	/ 50 	|	loss: -563.557
Finished training!
Time Series #10
s

# TimeFlow

In [5]:
# synthetic_path = synthetic_path = f"{dataset_path}synthetic/TimeFlow/"
# ts_iterator = get_hsm_dataset(dataset_path, selected_files=f"{dataset_path}/selected.csv")
# seed_everything(0)

# for ts_index, time_series in enumerate(ts_iterator):
#     print(f"Time Series #{ts_index}")
    
#     (train_ts, *_), *_ = split_data(time_series, val_size=val_size, test_size=test_size)
#     train_ts = log_returns(train_ts)
#     train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)]

#     TimeFlow_model = TimeFlow(hidden=200, T=len(train_ts), n_flows=10, normalize=False)

#     TimeFlow_losses = TimeFlow_model.fit(train_ts.values.reshape(1, - 1), epochs=50, batch_size=128, 
#                             learning_rate=1e-3, display_step=50)

#     synth_data = TimeFlow_model.sample(n_samples // len(train_ts))
#     np.save(synthetic_path + f"selected{ts_index}.npy", synth_data)

#     del train_ts, synth_data, TimeFlow_model, TimeFlow_losses

12 ts time: ~30 min

# Similarity

In [17]:
from tqdm import tqdm
from pathlib import Path

import numpy as np
import pandas as pd
from scipy.special import kl_div

from utils.data import get_hsm_dataset, get_solar_energy_dataset, get_fuel_prices_dataset, get_passengers_dataset, split_data, log_returns

In [18]:
results_dir = Path("results")

In [19]:
sj_div = lambda x, y: (kl_div(x, (x + y) / 2) + kl_div(y, (x + y) / 2)) / 2
min_max_norm = lambda x: (x - x.min()) / (x.max() - x.min())

In [26]:
start_dataset = 0
start_ts = 0

for ds_ind, (dataset_path, dataset_name) in enumerate(((Path("data/huge_stock_market_dataset/"), "hsm"),\
     (Path("data/solar_energy"), "se"), (Path("data/fuel_prices/"), "fp"),\
        (Path("data/air_passengers/"), "ap"))):
    if ds_ind < start_dataset: continue
    print(f"processing {dataset_name} dataset")
    for model in ("FourierFlow", "RealNVP"):
        synthetic_path = dataset_path / f"synthetic/{model}/"
        results = {"kl_div": [], "sj_div": []}
        if dataset_name == "hsm":
            ts_iterator = get_hsm_dataset(dataset_path, selected_files=f"{dataset_path}/selected100.csv")
        elif dataset_name == "se":
            ts_iterator = get_solar_energy_dataset(dataset_path, max_results=10)
        elif dataset_name == "fp":
            ts_iterator = get_fuel_prices_dataset(dataset_path)
        else:
            ts_iterator = get_passengers_dataset(dataset_path, max_results=99)

        for ts_index, time_series in tqdm(enumerate(ts_iterator)):
            
            train_ts = log_returns(time_series if dataset_name == "hsm" else time_series[:10_000] + 1e-9)
            train_ts = train_ts[:(len(train_ts) // 4 * 4 + 1 if len(train_ts) % 4 > 0 else len(train_ts) - 3)].values.flatten()
            train_ts = min_max_norm(train_ts)
            
            synth_tss = np.load(synthetic_path / f"selected{ts_index}.npy")
            kl_div_res = sj_div_res = 0
            for synth_ts in synth_tss:
                synth_ts = min_max_norm(synth_ts)
                # synth_ts = np.histogram(synth_ts, bins=np.arange(start=0, stop=1, step=1/100))[0]
                # train_ts = np.histogram(train_ts, bins=np.arange(start=0, stop=1, step=1/100))[0]
                res = kl_div(synth_ts, train_ts)
                kl_div_res += np.where(np.isinf(res), 0, res).mean()
                sj_div_res += sj_div(synth_ts, train_ts).mean()
            results["kl_div"].append(kl_div_res / len(synth_tss))
            results["sj_div"].append(sj_div_res / len(synth_tss))
        
        pd.DataFrame(results).to_csv(results_dir / f"synth_{dataset_name}_sim_{model}.csv", index=False)

processing hsm dataset


100it [00:02, 42.20it/s]
100it [00:02, 44.07it/s]


processing se dataset


10it [00:00, 14.20it/s]
10it [00:00, 15.17it/s]


processing fp dataset


8it [00:00, 33.01it/s]
8it [00:00, 30.16it/s]


processing ap dataset


99it [00:51,  1.93it/s]
99it [00:49,  1.99it/s]
