In [4]:
%load_ext autoreload
%autoreload 2
import argparse
import numpy as np
import pandas as pd
import tensorflow as tf

import data_utils
import gan_utils
import gan

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [13]:
n_iters = 100
batch_size = 64
sinkhorn_eps = 100 # entropy regularisation coefficent (will take log10 of this number and round to int)
sinkhorn_l = 100 # number of sinkhorn iterations
reg_penalty = 100 # martingale regularisation penalty (will take log10 of this number and round to int)
lr = 1e-3

gen_type = 'lstmpdt'
activation = 'tanh'
nlstm = 1
g_state_size = 64
d_state_size = 64
log_series = True

dname = 'SPX'
time_steps = 60
sample_len = 300
stride = 50
seed = 42 # np.random.randint(0, 10000)
Dx = 1 # dimension of the time series

In [14]:
training_params = {
    'n_iters': n_iters,
    'batch_size': batch_size,
    'sinkhorn_eps': sinkhorn_eps,
    'sinkhorn_l': sinkhorn_l,
    'reg_penalty': reg_penalty,
    'lr': lr,
}

model_params = {
    'gen_type': gen_type,
    'activation': activation,
    'nlstm': nlstm,
    'g_state_size': g_state_size,
    'd_state_size': d_state_size,
    'log_series': log_series,
}

data_params = {
    'dname': dname,
    'dt': dt,
    'time_steps': time_steps,
    'seed': seed,
    'Dx': Dx,
}

In [15]:
parser = argparse.ArgumentParser(description='cot')

# parser.add_argument('-d', '--dname', type=str, default='GBM',
#                     choices=['SineImage', 'AROne', 'eeg', 'GBM'])
parser.add_argument('-t', '--test', type=str, default='cot',
                    choices=['cot'])
# parser.add_argument('-s', '--seed', type=int, default=42)
# parser.add_argument('-gss', '--g_state_size', type=int, default=32)
# parser.add_argument('-dss', '--d_state_size', type=int, default=32)
parser.add_argument('-gfs', '--g_filter_size', type=int, default=32)
parser.add_argument('-dfs', '--d_filter_size', type=int, default=32)
# parser.add_argument('-r', '--reg_penalty', type=float, default=10.0) # martingale regularisation coefficent
# parser.add_argument('-ts', '--time_steps', type=int, default=60)
# parser.add_argument('-sinke', '--sinkhorn_eps', type=float, default=100) # entropy regularisation coefficent
# parser.add_argument('-sinkl', '--sinkhorn_l', type=int, default=100) # number of sinkhorn iterations
# parser.add_argument('-Dx', '--Dx', type=int, default=1)
parser.add_argument('-Dy', '--Dy', type=int, default=10)
parser.add_argument('-Dz', '--z_dims_t', type=int, default=1)
# parser.add_argument('-g', '--gen', type=str, default="genlstm",
#                     choices=["lstm", "fc", "genlstm"])
# parser.add_argument('-bs', '--batch_size', type=int, default=64)
# parser.add_argument('-nlstm', '--nlstm', type=int, default=1,
                    # help="number of lstms in discriminator")
# parser.add_argument('-lr', '--lr', type=float, default=1e-3)
parser.add_argument('-bn', '--bn', type=int, default=1,
                    help="batch norm")

args, unknown = parser.parse_known_args()

In [16]:
df = pd.read_csv('./data/spx_20231229.csv', index_col=0, parse_dates=True)
df

Unnamed: 0_level_0,spx
Date,Unnamed: 1_level_1
1927-12-30,17.660000
1928-01-03,17.760000
1928-01-04,17.719999
1928-01-05,17.549999
1928-01-06,17.660000
...,...
2023-12-21,4746.750000
2023-12-22,4754.629883
2023-12-26,4774.750000
2023-12-27,4781.580078


In [22]:
test = args.test
bn = bool(args.bn)
g_output_activation = 'linear'

if dname == 'AROne':
    data_dist = data_utils.AROne(
        Dx, time_steps, np.linspace(0.1, 0.9, Dx), 0.5)
elif dname == 'eeg':
    data_dist = data_utils.EEGData(
        Dx, time_steps, batch_size, n_iters, seed=seed)
elif dname == 'SineImage':
    data_dist = data_utils.SineImage(
        length=time_steps, Dx=Dx, rand_std=0.1)
elif dname == 'GBM':
    data_dist = data_utils.GBM(mu=0.2, sigma=0.5, dt=dt, length=time_steps, batch_size=batch_size, n_paths=batch_size*100,
                               log_series=log_series, initial_value=1.0, time_dim=False, seed=seed)
elif dname == 'OU':
    data_dist = data_utils.OU(kappa=10., theta=1., sigma=0.5, dt=dt, length=time_steps, batch_size=batch_size, n_paths=batch_size*100,
                              log_series=log_series, initial_value=1.0, time_dim=False, seed=seed)
elif dname == 'Heston':
    data_dist = data_utils.Heston(mu=0.2, v0=0.25, kappa=1., theta=0.16, rho=-0.7, sigma=0.2, dt=dt, length=time_steps, batch_size=batch_size, n_paths=batch_size*100,
                                  log_series=log_series, initial_value=1.0, time_dim=False, seed=seed)
elif dname == 'SPX':
    data_dist = data_utils.DFDataset(df, '1995-01-01', '2022-10-19', sample_len, batch_size, stride)
else:
    ValueError('Data does not exist.')

dataset = dname

real_data = data_dist.batch(batch_size)
real_data_p = data_dist.batch(batch_size)
real_data = tf.cast(real_data, tf.float32)
real_data_p = tf.cast(real_data_p, tf.float32)

In [23]:
real_data.shape

TensorShape([64, 300, 2])

In [24]:
real_data[0]

<tf.Tensor: shape=(300, 2), dtype=float32, numpy=
array([[ 0.00000000e+00,  0.00000000e+00],
       [ 2.73895264e-03, -7.94621557e-03],
       [ 5.47981262e-03,  2.66764255e-04],
       [ 8.21876526e-03, -2.39438712e-04],
       [ 1.09596252e-02, -2.65283193e-02],
       [ 1.91783905e-02, -1.56052075e-02],
       [ 2.19173431e-02, -1.88135281e-02],
       [ 2.46582031e-02, -1.22893769e-02],
       [ 2.73971558e-02,  3.18105071e-04],
       [ 3.01361084e-02,  9.60685837e-04],
       [ 4.10957336e-02, -5.96232340e-03],
       [ 4.38346863e-02,  4.82140156e-03],
       [ 4.65755463e-02,  1.77473146e-02],
       [ 4.93144989e-02,  1.86575055e-02],
       [ 5.75332642e-02,  1.85634904e-02],
       [ 6.02741241e-02,  1.88857429e-02],
       [ 6.30130768e-02,  2.60894150e-02],
       [ 6.57539368e-02,  2.89644320e-02],
       [ 6.84928894e-02,  2.82397550e-02],
       [ 7.67116547e-02,  2.50992421e-02],
       [ 7.94525146e-02,  2.76776683e-02],
       [ 8.21914673e-02,  2.80203000e-02],
    