In [4]:
import numpy as np
import pandas as pd

from tqdm import tqdm
from pathlib import Path

import torch
# from torch.utils.tensorboard import SummaryWriter

from utils.data import get_hsm_dataset, get_solar_energy_dataset, get_fuel_prices_dataset, get_passengers_dataset, split_data, log_returns, DimUniversalStandardScaler
from utils.metrics import MAPE, WAPE, MAE
from utils.TTS_GAN import TTS_GAN_Generator, TTS_GAN_Discriminator, weights_init, train_TTS_GAN

In [2]:
hsm_dataset_path = Path("data/huge_stock_market_dataset/")
solar_energy_dataset_path = Path("data/solar_energy/")
fuel_prices_dataset_path = Path("data/fuel_prices/")
passengers_dataset_path = Path("data/air_passengers/")
models_dir = Path("models/")

In [3]:
device = gpu = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

lr = 2e-4
wd = 0
ctrl_lr = 3.5e-4
beta1 = 0.0
beta2 = 0.9
max_epoch = 20
latent_dim = 128
batch_size = gen_batch_size = dis_batch_size = 64
ema = 0.995
ema_kimg = 500
ema_warmup = 0
world_size = 0
rank = - 1
print_freq = 50
n_critic = 1
phi = 1
accumulated_times = g_accumulated_times = 1
loss = "standard"
seq_len = 150

n_samples = 80  # number of samples generated by QuantGAN

cuda:0


In [6]:
ts_iterator = get_hsm_dataset(hsm_dataset_path, selected_files=hsm_dataset_path / "selected100.csv")
synthetic_path = hsm_dataset_path / "synthetic/TTS_GAN/"

start_point = 0
for _ in range(start_point): next(ts_iterator)

for ts_index, time_series in enumerate(ts_iterator, start=start_point):
    print(f"Time Series #{ts_index}")
    
    # train_ts = log_returns(time_series)
    train_ts = time_series.values

    # using sequences of seq_len to train model
    train_ts = np.array([train_ts[i: i + seq_len] for i in range(len(train_ts) - seq_len)])

    scaler = DimUniversalStandardScaler()
    train_ts = scaler.fit_transform(train_ts)

    train_dl = torch.utils.data.DataLoader(torch.from_numpy(train_ts.reshape(- 1, 1, 1, seq_len)).to(device), batch_size=batch_size, shuffle=True)

    TTS_GAN_gen = TTS_GAN_Generator(seq_len=seq_len, channels=1, latent_dim=latent_dim, ).to(device)
    TTS_GAN_dis = TTS_GAN_Discriminator(seq_length=seq_len, in_channels=1).to(device)

    gen_optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, TTS_GAN_gen.parameters()), lr)
    dis_optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, TTS_GAN_dis.parameters()), lr)
    
    for epoch in range(max_epoch):
        losses = train_TTS_GAN(globals(), TTS_GAN_gen, TTS_GAN_dis, gen_optimizer, dis_optimizer, train_dl, epoch)
    tqdm.write(f"generator loss: {losses[0]: 0.4f} discriminator loss: {losses[1]: 0.4f}")
    del dis_optimizer, gen_optimizer, TTS_GAN_dis, train_dl
    torch.cuda.empty_cache()

    synth_data = []
    with torch.no_grad():
        for _ in range(n_samples):
            z = torch.cuda.FloatTensor(np.random.normal(0, 1, (1, latent_dim))).cuda(device, non_blocking=True)
            synth_data.append(TTS_GAN_gen(z).cpu().numpy())
            del z
            torch.cuda.empty_cache()
    np.save(synthetic_path / f"selected{ts_index}.npy", scaler.inverse_transform(np.row_stack(synth_data)))

    del TTS_GAN_gen, synth_data
    torch.cuda.empty_cache()

Time Series #0
generator loss: -0.6101 discriminator loss:  1.3817
Time Series #1
generator loss: -0.4423 discriminator loss:  0.8679
Time Series #2
generator loss: -0.5821 discriminator loss:  1.5387
Time Series #3
generator loss: -0.5131 discriminator loss:  1.1995
Time Series #4
generator loss: -0.5394 discriminator loss:  1.2372
Time Series #5
generator loss: -0.4120 discriminator loss:  1.1106
Time Series #6
generator loss: -0.6126 discriminator loss:  1.7059
Time Series #7
generator loss: -0.3423 discriminator loss:  0.8353
Time Series #8
generator loss: -0.6023 discriminator loss:  1.3908
Time Series #9
generator loss: -0.4496 discriminator loss:  1.2067
Time Series #10
generator loss: -0.5178 discriminator loss:  1.3138
Time Series #11
generator loss: -0.4435 discriminator loss:  1.0226
Time Series #12
generator loss: -0.4819 discriminator loss:  1.1233
Time Series #13
generator loss: -0.4872 discriminator loss:  1.1253
Time Series #14
generator loss: -0.4918 discriminator loss

Time: ~53 min

In [7]:
ts_iterator = get_solar_energy_dataset(solar_energy_dataset_path, max_results=10)
synthetic_path = solar_energy_dataset_path / "synthetic/TTS_GAN/"
max_epoch = 4

start_point = 0
for _ in range(start_point): next(ts_iterator)

for ts_index, time_series in enumerate(ts_iterator, start=start_point):
    print(f"Time Series #{ts_index}")
    
    # train_ts = log_returns(time_series + 1e-9)
    train_ts = time_series.values

    # using sequences of seq_len to train model
    train_ts = np.array([train_ts[i: i + seq_len] for i in range(len(train_ts) - seq_len)])

    scaler = DimUniversalStandardScaler()
    train_ts = scaler.fit_transform(train_ts)
    train_dl = torch.utils.data.DataLoader(torch.from_numpy(train_ts.reshape(- 1, 1, 1, seq_len)).to(device), batch_size=batch_size, shuffle=True)

    TTS_GAN_gen = TTS_GAN_Generator(seq_len=seq_len, channels=1, latent_dim=latent_dim, ).to(device)
    TTS_GAN_dis = TTS_GAN_Discriminator(seq_length=seq_len, in_channels=1).to(device)

    gen_optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, TTS_GAN_gen.parameters()), lr)
    dis_optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, TTS_GAN_dis.parameters()), lr)
    
    for epoch in range(max_epoch):
        losses = train_TTS_GAN(globals(), TTS_GAN_gen, TTS_GAN_dis, gen_optimizer, dis_optimizer, train_dl, epoch)
    tqdm.write(f"generator loss: {losses[0]: 0.4f} discriminator loss: {losses[1]: 0.4f}")
    del dis_optimizer, gen_optimizer, TTS_GAN_dis, train_dl
    torch.cuda.empty_cache()

    samples_to_gen = n_samples
    synth_data = []
    with torch.no_grad():
        for _ in range(samples_to_gen):
            z = torch.cuda.FloatTensor(np.random.normal(0, 1, (1, latent_dim))).cuda(device, non_blocking=True)
            synth_data.append(TTS_GAN_gen(z).cpu().numpy())
            del z
            torch.cuda.empty_cache()
    np.save(synthetic_path / f"selected{ts_index}.npy", scaler.inverse_transform(np.row_stack(synth_data)))

    del TTS_GAN_gen, synth_data
    torch.cuda.empty_cache()

Time Series #0
generator loss: -0.4282 discriminator loss:  0.9997
Time Series #1
generator loss: -0.4414 discriminator loss:  1.1532
Time Series #2
generator loss: -0.3949 discriminator loss:  1.1185
Time Series #3
generator loss: -0.4401 discriminator loss:  0.9961
Time Series #4
generator loss: -0.4168 discriminator loss:  1.0035
Time Series #5
generator loss: -0.4403 discriminator loss:  1.0953
Time Series #6
generator loss: -0.3175 discriminator loss:  0.7822
Time Series #7
generator loss: -0.4146 discriminator loss:  1.0068
Time Series #8
generator loss: -0.4477 discriminator loss:  1.0524
Time Series #9
generator loss: -0.4271 discriminator loss:  0.9765


Time: 79 min

In [8]:
ts_iterator = get_fuel_prices_dataset(fuel_prices_dataset_path)
synthetic_path = fuel_prices_dataset_path / "synthetic/TTS_GAN/"
max_epoch = 10

start_point = 0
for _ in range(start_point): next(ts_iterator)

for ts_index, time_series in enumerate(ts_iterator, start=start_point):
    print(f"Time Series #{ts_index}")
    
    # train_ts = log_returns(time_series + 1e-9)
    train_ts = time_series.values

    # using sequences of seq_len to train model
    train_ts = np.array([train_ts[i: i + seq_len] for i in range(len(train_ts) - seq_len)])

    scaler = DimUniversalStandardScaler()
    train_ts = scaler.fit_transform(train_ts)
    train_dl = torch.utils.data.DataLoader(torch.from_numpy(train_ts.reshape(- 1, 1, 1, seq_len)).to(device), batch_size=batch_size, shuffle=True)

    TTS_GAN_gen = TTS_GAN_Generator(seq_len=seq_len, channels=1, latent_dim=latent_dim, ).to(device)
    TTS_GAN_dis = TTS_GAN_Discriminator(seq_length=seq_len, in_channels=1).to(device)

    gen_optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, TTS_GAN_gen.parameters()), lr)
    dis_optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, TTS_GAN_dis.parameters()), lr)
    
    for epoch in range(max_epoch):
        losses = train_TTS_GAN(globals(), TTS_GAN_gen, TTS_GAN_dis, gen_optimizer, dis_optimizer, train_dl, epoch)
    tqdm.write(f"generator loss: {losses[0]: 0.4f} discriminator loss: {losses[1]: 0.4f}")
    del dis_optimizer, gen_optimizer, TTS_GAN_dis, train_dl
    torch.cuda.empty_cache()

    samples_to_gen = n_samples
    synth_data = []
    with torch.no_grad():
        for _ in range(samples_to_gen):
            z = torch.cuda.FloatTensor(np.random.normal(0, 1, (1, latent_dim))).cuda(device, non_blocking=True)
            synth_data.append(TTS_GAN_gen(z).cpu().numpy())
            del z
            torch.cuda.empty_cache()
    np.save(synthetic_path / f"selected{ts_index}.npy", scaler.inverse_transform(np.row_stack(synth_data)))

    del TTS_GAN_gen, synth_data
    torch.cuda.empty_cache()

Time Series #0
generator loss: -0.5187 discriminator loss:  1.2032
Time Series #1
generator loss: -0.4393 discriminator loss:  1.0658
Time Series #2
generator loss: -0.4639 discriminator loss:  1.0994
Time Series #3
generator loss: -0.5546 discriminator loss:  1.4700
Time Series #4
generator loss: -0.3544 discriminator loss:  0.8858
Time Series #5
generator loss: -0.4610 discriminator loss:  1.1516
Time Series #6
generator loss: -0.4471 discriminator loss:  0.9922
Time Series #7
generator loss: -0.4453 discriminator loss:  0.9839


Time: 72 sec

In [10]:
ts_iterator = get_passengers_dataset(passengers_dataset_path)
synthetic_path = passengers_dataset_path / "synthetic/TTS_GAN/"
max_epoch = 10

start_point = 0
for _ in range(start_point): next(ts_iterator)

for ts_index, time_series in enumerate(ts_iterator, start=start_point):
    print(f"Time Series #{ts_index}")
    
    # train_ts = log_returns(time_series + 1e-9)
    train_ts = time_series.values

    # using sequences of seq_len to train model
    train_ts = np.array([train_ts[i: i + seq_len] for i in range(len(train_ts) - seq_len)])

    scaler = DimUniversalStandardScaler()
    train_ts = scaler.fit_transform(train_ts)
    train_dl = torch.utils.data.DataLoader(torch.from_numpy(train_ts.reshape(- 1, 1, 1, seq_len)).to(device), batch_size=batch_size, shuffle=True)

    TTS_GAN_gen = TTS_GAN_Generator(seq_len=seq_len, channels=1, latent_dim=latent_dim, ).to(device)
    TTS_GAN_dis = TTS_GAN_Discriminator(seq_length=seq_len, in_channels=1).to(device)

    gen_optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, TTS_GAN_gen.parameters()), lr)
    dis_optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, TTS_GAN_dis.parameters()), lr)
    
    for epoch in range(max_epoch):
        losses = train_TTS_GAN(globals(), TTS_GAN_gen, TTS_GAN_dis, gen_optimizer, dis_optimizer, train_dl, epoch)
    tqdm.write(f"generator loss: {losses[0]: 0.4f} discriminator loss: {losses[1]: 0.4f}")
    del dis_optimizer, gen_optimizer, TTS_GAN_dis, train_dl
    torch.cuda.empty_cache()

    samples_to_gen = n_samples
    synth_data = []
    with torch.no_grad():
        for _ in range(samples_to_gen):
            z = torch.cuda.FloatTensor(np.random.normal(0, 1, (1, latent_dim))).cuda(device, non_blocking=True)
            synth_data.append(TTS_GAN_gen(z).cpu().numpy())
            del z
            torch.cuda.empty_cache()
    np.save(synthetic_path / f"selected{ts_index}.npy", scaler.inverse_transform(np.row_stack(synth_data)))

    del TTS_GAN_gen, synth_data
    torch.cuda.empty_cache()

Time Series #0
generator loss: -0.5580 discriminator loss:  1.4303
Time Series #1
generator loss: -0.4787 discriminator loss:  1.1723
Time Series #2
generator loss: -0.5233 discriminator loss:  1.3867
Time Series #3
generator loss: -0.5166 discriminator loss:  1.3812
Time Series #4
generator loss: -0.5367 discriminator loss:  1.3690
Time Series #5
generator loss: -0.4585 discriminator loss:  1.3159
Time Series #6
generator loss: -0.4619 discriminator loss:  1.3334
Time Series #7
generator loss: -0.4988 discriminator loss:  1.2971
Time Series #8
generator loss: -0.5702 discriminator loss:  1.4272
Time Series #9
generator loss: -0.5297 discriminator loss:  1.3865
Time Series #10
generator loss: -0.5642 discriminator loss:  1.3835
Time Series #11
generator loss: -0.4673 discriminator loss:  1.1893
Time Series #12
generator loss: -0.5828 discriminator loss:  1.3618
Time Series #13
generator loss: -0.5890 discriminator loss:  1.4060
Time Series #14
generator loss: -0.4848 discriminator loss

# Similarity

In [11]:
from tqdm import tqdm
from pathlib import Path

import numpy as np
import pandas as pd

from utils.data import get_hsm_dataset, get_solar_energy_dataset, get_fuel_prices_dataset, get_passengers_dataset, split_data, log_returns
from utils.synth_eval import eval_sim

In [12]:
hsm_dataset_path = Path("data/huge_stock_market_dataset/")
solar_energy_dataset_path = Path("data/solar_energy/")
fuel_prices_dataset_path = Path("data/fuel_prices/")
passengers_dataset_path = Path("data/air_passengers/")
results_dir = Path("results")

seq_len = 150

In [13]:
eval_sim(("hsm", "se", "fp", "ap"), (hsm_dataset_path, solar_energy_dataset_path, fuel_prices_dataset_path, passengers_dataset_path),
     "TTS_GAN", save=True, results_dir=results_dir)

processing hsm dataset


100it [00:32,  3.07it/s]


processing se dataset


  kl_div_res.append(kl_div(synth_ts, train_ts[i: i + len(synth_ts)]).mean())
  ret = ret.dtype.type(ret / rcount)
  kl_div_res.append(kl_div(synth_ts, train_ts[i: i + len(synth_ts)]).mean())
  ret = ret.dtype.type(ret / rcount)
  kl_div_res.append(kl_div(synth_ts, train_ts[i: i + len(synth_ts)]).mean())
  ret = ret.dtype.type(ret / rcount)
  kl_div_res.append(kl_div(synth_ts, train_ts[i: i + len(synth_ts)]).mean())
  ret = ret.dtype.type(ret / rcount)
  kl_div_res.append(kl_div(synth_ts, train_ts[i: i + len(synth_ts)]).mean())
  ret = ret.dtype.type(ret / rcount)
  kl_div_res.append(kl_div(synth_ts, train_ts[i: i + len(synth_ts)]).mean())
  ret = ret.dtype.type(ret / rcount)
  kl_div_res.append(kl_div(synth_ts, train_ts[i: i + len(synth_ts)]).mean())
  ret = ret.dtype.type(ret / rcount)
  kl_div_res.append(kl_div(synth_ts, train_ts[i: i + len(synth_ts)]).mean())
  ret = ret.dtype.type(ret / rcount)
  kl_div_res.append(kl_div(synth_ts, train_ts[i: i + len(synth_ts)]).mean())
  ret = ret

processing fp dataset


8it [00:01,  5.34it/s]


processing ap dataset


50it [00:32,  1.55it/s]


defaultdict(dict,
            {'hsm': {'kl_div': 11.69661606438711,
              'kstest_pval': 0.0028594020852601536},
             'se': {'kl_div': nan, 'kstest_pval': 2.99821208283929e-06},
             'fp': {'kl_div': 13.111345020163736,
              'kstest_pval': 0.008089773849450373},
             'ap': {'kl_div': 2286.7915978449946,
              'kstest_pval': 0.02092120631954569}})