In [1]:
from datetime import datetime
import time
import gc
import os
import argparse
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tqdm
import tensorflow as tf
tf.keras.backend.set_floatx('float32')
import data_utils
import gan_utils
import gan

In [None]:
sinkhorn_eps_list = [1., 1.]
reg_penalty_list = [0.1, 1.]

assert len(sinkhorn_eps_list) == len(reg_penalty_list)

for i in range(len(sinkhorn_eps_list)):
    sinkhorn_eps = sinkhorn_eps_list[i]
    reg_penalty = reg_penalty_list[i]

    n_iters = 200
    batch_size = 32
    # sinkhorn_eps = 10. # entropy regularisation coefficent (will take log10 of this number and round to int)
    sinkhorn_l = 200 # number of sinkhorn iterations
    # reg_penalty = 0.1 # martingale regularisation penalty (will take log10 of this number and round to int)
    gen_lr = 1e-3
    disc_lr = 1e-3

    gen_type = 'lstmd'
    activation = 'tanh'
    nlstm = 1
    g_state_size = 64
    d_state_size = 64``
    log_series = True

    dname = 'SPX'
    seq_dim = 1 # dimension of the time series excluding time dimension
    Dx = 2 # dimension of the time series including time dimension
    time_steps = 250 # for the discriminator
    sample_len = 300 # for the generator
    hist_len = 50
    stride = 50
    seed = 42 # np.random.randint(0, 10000)
    dt = 1 / 252

    patience = 20
    factor = 0.5
    fig_freq = 10
    training_params = {
        'n_iters': n_iters,
        'batch_size': batch_size,
        'sinkhorn_eps': sinkhorn_eps,
        'sinkhorn_l': sinkhorn_l,
        'reg_penalty': reg_penalty,
        'gen_lr': gen_lr,
        'disc_lr': disc_lr,
        'patience': patience,
        'factor': factor,
    }

    model_params = {
        'gen_type': gen_type,
        'activation': activation,
        # 'nlstm': nlstm,
        'g_state_size': g_state_size,
        'd_state_size': d_state_size,
        'log_series': log_series,
    }

    data_params = {
        'dname': dname,
        'dt': dt,
        'sample_len': sample_len,
        'hist_len': hist_len,
        'time_steps': time_steps,
        'stride': stride,
        'seed': seed,
        'Dx': Dx,
    }
    parser = argparse.ArgumentParser(description='cot')
    parser.add_argument('-t', '--test', type=str, default='cot',
                        choices=['cot'])
    parser.add_argument('-gfs', '--g_filter_size', type=int, default=32)
    parser.add_argument('-dfs', '--d_filter_size', type=int, default=32)
    parser.add_argument('-Dy', '--Dy', type=int, default=10)
    parser.add_argument('-Dz', '--z_dims_t', type=int, default=4)
    parser.add_argument('-bn', '--bn', type=int, default=1,
                        help="batch norm")

    args, unknown = parser.parse_known_args()
    tf.random.set_seed(seed)
    np.random.seed(seed)
    start_time = time.time()
    test = args.test
    bn = bool(args.bn)
    g_output_activation = 'linear'

    df = pd.read_csv('./data/spx_20231229.csv', index_col=0, parse_dates=True)
    data_dist = data_utils.DFDataset(df, '1995-01-01', '2022-10-19', sample_len, batch_size, stride)
    dataset = dname
    n_layers = 1
    gen_optimiser = tf.keras.optimizers.legacy.Adam(gen_lr)
    dischm_optimiser = tf.keras.optimizers.legacy.Adam(disc_lr)
    disc_iters = 1
    scaling_coef = 1.0

    # Define a standard multivariate normal for
    # (z1, z2, ..., zT) --> (y1, y2, ..., yT)
    z_dims_t = args.z_dims_t
    y_dims = args.Dy
    dist_z = data_utils.GARCH(df, start_date='1995-01-01', end_date='2022-10-19', sample_len=300,
                            p=20, o=0, q=0, mean_model='Zero', vol_model='GARCH', dist='gaussian',
                            seed=42, stride=50)

    # Create instances of generator, discriminator_h and
    # discriminator_m CONV VERSION
    g_filter_size = args.g_filter_size
    d_filter_size = args.d_filter_size
    disc_kernel_width = 5

    generator = gan.GenLSTMd(z_dims_t, seq_dim, sample_len, hist_len, hidden_size=g_state_size)

    discriminator_h = gan.ToyDiscriminator(
        batch_size, time_steps, z_dims_t, Dx, d_state_size, d_filter_size,
        kernel_size=disc_kernel_width, nlayer=2, nlstm=0, bn=bn)
    discriminator_m = gan.ToyDiscriminator(
        batch_size, time_steps, z_dims_t, Dx, d_state_size, d_filter_size,
        kernel_size=disc_kernel_width, nlayer=2, nlstm=0, bn=bn)

    if reg_penalty.is_integer() and sinkhorn_eps.is_integer():
        suffix = f"{dname[:3]}_e{int(sinkhorn_eps):d}r{int(reg_penalty):d}s{seed:d}"
    elif reg_penalty.is_integer() and not sinkhorn_eps.is_integer():
        suffix = f"{dname[:3]}_e{sinkhorn_eps:.3g}r{int(reg_penalty):d}s{seed:d}"
    elif not reg_penalty.is_integer() and sinkhorn_eps.is_integer():
        suffix = f"{dname[:3]}_e{int(sinkhorn_eps):d}r{reg_penalty:.3g}s{seed:d}"
    else:
        suffix = f"{dname[:3]}_e{sinkhorn_eps:.3g}r{reg_penalty:.3g}s{seed:d}"

    saved_file =  "{}_{}{}-{}-{}".format(dataset, datetime.now().strftime("%h"),
                                        datetime.now().strftime("%d"),
                                        datetime.now().strftime("%H"),
                                        datetime.now().strftime("%M"),
                                        datetime.now().strftime("%S")) + suffix

    log_dir = f"./trained/{saved_file}/log"

    # Create directories for storing images later.
    if not os.path.exists(f"trained/{saved_file}/data"):
        os.makedirs(f"trained/{saved_file}/data")
    if not os.path.exists(f"trained/{saved_file}/images"):
        os.makedirs(f"trained/{saved_file}/images")

    # GAN train notes
    with open("./trained/{}/train_notes.txt".format(saved_file), 'w') as f:
        # Include any experiment notes here:
        f.write("Experiment notes: .... \n\n")
        f.write("MODEL_DATA: {}\nSEQ_LEN: {}\n".format(
            dataset,
            time_steps, ))
        f.write("STATE_SIZE: {}\nNUM_LAYERS: {}\nLAMBDA: {}\n".format(
            g_state_size,
            n_layers,
            reg_penalty))
        f.write("BATCH_SIZE: {}\nCRITIC_ITERS: {}\nGenerator LR: {}\nDiscriminator LR:{}\n".format(
            batch_size,
            disc_iters,
            gen_lr,
            disc_lr))
        f.write("SINKHORN EPS: {}\nSINKHORN L: {}\n\n".format(
            sinkhorn_eps,
            sinkhorn_l))

    train_writer = tf.summary.create_file_writer(logdir=log_dir)

    with train_writer.as_default():
        tf.summary.text('training_params', data_utils.pretty_json(training_params), step=0)
        tf.summary.text('model_params', data_utils.pretty_json(model_params), step=0)
        tf.summary.text('data_params', data_utils.pretty_json(data_params), step=0)

    @tf.function
    def disc_training_step(real_data, real_data_p):
        hidden_z = dist_z.sample([batch_size, sample_len-1, z_dims_t])
        hidden_z_p = dist_z.sample([batch_size, sample_len-1, z_dims_t])

        with tf.GradientTape(persistent=True) as disc_tape:
            fake_data = generator.call(hidden_z, real_data)         # For SPX
            fake_data_p = generator.call(hidden_z_p, real_data_p)   # For SPX

            # h_fake = discriminator_h.call(fake_data)
            # m_real = discriminator_m.call(real_data)
            # m_fake = discriminator_m.call(fake_data)
            # h_real_p = discriminator_h.call(real_data_p)
            # h_fake_p = discriminator_h.call(fake_data_p)
            # m_real_p = discriminator_m.call(real_data_p)
            # loss1 = gan_utils.compute_mixed_sinkhorn_loss(
            #     real_data, fake_data, m_real, m_fake, h_fake, scaling_coef,
            #     sinkhorn_eps, sinkhorn_l, real_data_p, fake_data_p, m_real_p,
            #     h_real_p, h_fake_p)

    ############################################################################################################

            # NOTE: FOR USING hist_len ONWARDS FOR LOSS COMPUTATION
            h_fake = discriminator_h.call(fake_data[:,hist_len:,:]) # For SPX
            m_real = discriminator_m.call(real_data[:,hist_len:,:]) # For SPX
            m_fake = discriminator_m.call(fake_data[:,hist_len:,:]) # For SPX
            h_real_p = discriminator_h.call(real_data_p[:,hist_len:,:]) # For SPX
            h_fake_p = discriminator_h.call(fake_data_p[:,hist_len:,:]) # For SPX
            m_real_p = discriminator_m.call(real_data_p[:,hist_len:,:]) # For SPX
            loss1 = gan_utils.compute_mixed_sinkhorn_loss(
                real_data[:,hist_len:,:], fake_data[:,hist_len:,:], m_real, m_fake, h_fake, scaling_coef,
                sinkhorn_eps, sinkhorn_l, real_data_p[:,hist_len:,:], fake_data_p[:,hist_len:,:], m_real_p,
                h_real_p, h_fake_p)

    ############################################################################################################

            pm1 = gan_utils.scale_invariante_martingale_regularization(
                m_real, reg_penalty, scaling_coef)
            disc_loss = - loss1 + pm1
        # update discriminator parameters
        disch_grads, discm_grads = disc_tape.gradient(
            disc_loss, [discriminator_h.trainable_variables, discriminator_m.trainable_variables])
        dischm_optimiser.apply_gradients(zip(disch_grads, discriminator_h.trainable_variables))
        dischm_optimiser.apply_gradients(zip(discm_grads, discriminator_m.trainable_variables))

    @tf.function
    def gen_training_step(real_data, real_data_p):
        hidden_z = dist_z.sample([batch_size, sample_len-1, z_dims_t])
        hidden_z_p = dist_z.sample([batch_size, sample_len-1, z_dims_t])

        with tf.GradientTape() as gen_tape:
            fake_data = generator.call(hidden_z, real_data)             # For SPX
            fake_data_p = generator.call(hidden_z_p, real_data_p)       # For SPX

            # h and m networks used to compute the martingale penalty

            # h_fake = discriminator_h.call(fake_data)
            # m_real = discriminator_m.call(real_data)
            # m_fake = discriminator_m.call(fake_data)
            # h_real_p = discriminator_h.call(real_data_p)
            # h_fake_p = discriminator_h.call(fake_data_p)
            # m_real_p = discriminator_m.call(real_data_p)
            # loss2 = gan_utils.compute_mixed_sinkhorn_loss(
            #     real_data, fake_data, m_real, m_fake, h_fake, scaling_coef,
            #     sinkhorn_eps, sinkhorn_l, real_data_p, fake_data_p, m_real_p,
            #     h_real_p, h_fake_p)

    ############################################################################################################

            # # NOTE: FOR USING hist_len ONWARDS FOR LOSS COMPUTATION
            h_fake = discriminator_h.call(fake_data[:,hist_len:,:]) # For SPX
            m_real = discriminator_m.call(real_data[:,hist_len:,:]) # For SPX
            m_fake = discriminator_m.call(fake_data[:,hist_len:,:]) # For SPX
            h_real_p = discriminator_h.call(real_data_p[:,hist_len:,:]) # For SPX
            h_fake_p = discriminator_h.call(fake_data_p[:,hist_len:,:]) # For SPX
            m_real_p = discriminator_m.call(real_data_p[:,hist_len:,:]) # For SPX
            loss2 = gan_utils.compute_mixed_sinkhorn_loss(
                real_data[:,hist_len:,:], fake_data[:,hist_len:,:], m_real, m_fake, h_fake, scaling_coef,
                sinkhorn_eps, sinkhorn_l, real_data_p[:,hist_len:,:], fake_data_p[:,hist_len:,:], m_real_p,
                h_real_p, h_fake_p)

    ############################################################################################################

            gen_loss = loss2
        # update generator parameters
        generator_grads = gen_tape.gradient(
            gen_loss, generator.trainable_variables)
        gen_optimiser.apply_gradients(zip(generator_grads, generator.trainable_variables))
        return loss2

    it_counts = 0
    with tqdm.trange(n_iters, ncols=150) as it:
        best_loss = [np.inf, 0]
        for _ in it:
            it_counts += 1
            # generate a batch of REAL data
            real_data = data_dist.batch(batch_size)
            real_data_p = data_dist.batch(batch_size)
            real_data = tf.cast(real_data, tf.float32)
            real_data_p = tf.cast(real_data_p, tf.float32)

            disc_training_step(real_data, real_data_p)
            loss = gen_training_step(real_data, real_data_p)
            it.set_postfix(loss=float(loss))

            with train_writer.as_default():
                tf.summary.scalar('Sinkhorn loss', loss, step=it_counts)
                train_writer.flush()

            if not np.isfinite(loss.numpy()):
                # print('%s Loss exploded!' % model_fn)
                print('Loss exploded')
                # Open the existing file with mode a - append
                with open("./trained/{}/train_notes.txt".format(saved_file), 'a') as f:
                    # Include any experiment notes here:
                    f.write("\n Training failed! ")
                break
            else:
                # check if the loss is the best so far and reduce lr if no improvement beyond patience
                if loss < best_loss[0]:
                    best_loss = [loss, it_counts]
                if it_counts - best_loss[1] > patience:
                    gen_lr *= factor
                    disc_lr *= factor
                    gen_optimiser.lr.assign(gen_lr)
                    dischm_optimiser.lr.assign(disc_lr)
                    best_loss = [loss, it_counts] # reset best loss iteration to current iteration for next patience
                    print(f'Reducing gen_lr to {gen_lr} and disc_lr to {disc_lr} at iteration {it_counts}')

                # print("Plot samples produced by generator after %d iterations" % it_counts)
                z = dist_z.sample([batch_size, sample_len-1, z_dims_t])
                samples = generator.call(z, real_data, training=False) # For SPX

                batch_series = np.asarray(samples[...,1])
                if log_series:
                    plt.plot(np.exp(batch_series.T))
                    sample_mean = np.diff(batch_series, axis=1).mean() / dt
                    sample_std = np.diff(batch_series, axis=1).std() / np.sqrt(dt)
                else:
                    plt.plot(batch_series.T)
                    sample_mean = np.diff(np.log(batch_series), axis=1).mean() / dt
                    sample_std = np.diff(np.log(batch_series), axis=1).std() / np.sqrt(dt)

                with train_writer.as_default():
                    if it_counts % fig_freq == 0:
                        tf.summary.image("Generated samples", data_utils.plot_to_image(plt.gcf()), step=it_counts)
                    tf.summary.scalar('Stats/Sample_mean', sample_mean, step=it_counts)
                    tf.summary.scalar('Stats/Sample_std', sample_std, step=it_counts)
                # save model to file
                generator.save_weights(f"./trained/{saved_file}/generator/")
                discriminator_h.save_weights(f"./trained/{saved_file}/discriminator_h/")
                discriminator_m.save_weights(f"./trained/{saved_file}/discriminator_m/")
            continue

    print("--- The entire training takes %s minutes ---" % ((time.time() - start_time) / 60.0))

Optimization terminated successfully    (Exit mode 0)
            Current function value: 5855.988337133396
            Iterations: 36
            Function evaluations: 836
            Gradient evaluations: 36
                        Zero Mean - ARCH Model Results                        
Dep. Variable:           gaussianized   R-squared:                       0.000
Mean Model:                 Zero Mean   Adj. R-squared:                  0.000
Vol Model:                       ARCH   Log-Likelihood:               -5855.99
Distribution:                  Normal   AIC:                           11754.0
Method:            Maximum Likelihood   BIC:                           11897.9
                                        No. Observations:                 6999
Date:                Mon, Apr 08 2024   Df Residuals:                     6999
Time:                        09:26:51   Df Model:                            0
                               Volatility Model                              
 

2024-04-08 09:26:51.977107: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-08 09:26:51.998155: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-08 09:26:51.998195: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-08 09:26:51.999155: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:0b:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-08 09:26:51.999191: I tensorflow/compile

Reducing gen_lr to 0.0005 and disc_lr to 0.0005 at iteration 100


 64%|█████████████████████████████████████████████████████████████████▏                                   | 129/200 [23:55<06:33,  5.54s/it, loss=2.1]

Reducing gen_lr to 0.00025 and disc_lr to 0.00025 at iteration 130


 76%|███████████████████████████████████████████████████████████████████████████▏                       | 152/200 [26:04<04:32,  5.68s/it, loss=0.521]

Reducing gen_lr to 0.000125 and disc_lr to 0.000125 at iteration 153


 92%|███████████████████████████████████████████████████████████████████████████████████████████▌       | 185/200 [29:07<01:24,  5.66s/it, loss=-2.58]

Reducing gen_lr to 6.25e-05 and disc_lr to 6.25e-05 at iteration 186


100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [30:26<00:00,  9.13s/it, loss=-0.288]


--- The entire training takes 30.474170088768005 minutes ---
Optimization terminated successfully    (Exit mode 0)
            Current function value: 5855.988337133396
            Iterations: 36
            Function evaluations: 836
            Gradient evaluations: 36
                        Zero Mean - ARCH Model Results                        
Dep. Variable:           gaussianized   R-squared:                       0.000
Mean Model:                 Zero Mean   Adj. R-squared:                  0.000
Vol Model:                       ARCH   Log-Likelihood:               -5855.99
Distribution:                  Normal   AIC:                           11754.0
Method:            Maximum Likelihood   BIC:                           11897.9
                                        No. Observations:                 6999
Date:                Mon, Apr 08 2024   Df Residuals:                     6999
Time:                        09:57:20   Df Model:                            0
                  

 50%|█████████████████████████████████████████████████▌                                                  | 99/200 [21:38<09:31,  5.66s/it, loss=-4.47]

Reducing gen_lr to 0.0005 and disc_lr to 0.0005 at iteration 100


 72%|███████████████████████████████████████████████████████████████████████▊                           | 145/200 [26:13<05:37,  6.14s/it, loss=-7.02]

Reducing gen_lr to 0.00025 and disc_lr to 0.00025 at iteration 146


 94%|█████████████████████████████████████████████████████████████████████████████████████████████      | 188/200 [30:44<01:19,  6.63s/it, loss=-7.26]

Reducing gen_lr to 0.000125 and disc_lr to 0.000125 at iteration 189


100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [31:54<00:00,  9.57s/it, loss=-5.97]

--- The entire training takes 31.93025059700012 minutes ---



