# train.py, the main function of this project

In [2]:
import itertools
import os
from os import path as pt

import matplotlib.pyplot as plt
import numpy as np
import torch
from sklearn.model_selection import train_test_split

from hyperparameters import SIGCWGAN_CONFIGS
from lib import ALGOS
from lib.algos.base import BaseConfig
from lib.data import get_data
from lib.plot import savefig, create_summary
from lib.utils import pickle_it

from torch import nn
from typing import Tuple

In [3]:
def get_algo_config(dataset, data_params):
    """ Get the algorithms parameters. """
    key = dataset
    if dataset == 'VAR':
        key += str(data_params['dim'])
    elif dataset == 'STOCKS':
        key += '_' + '_'.join(data_params['assets'])
    return SIGCWGAN_CONFIGS[key]


def set_seed(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)


def get_algo(algo_id, base_config, dataset, data_params, x_real):
    if algo_id == 'SigCWGAN':
        algo_config = get_algo_config(dataset, data_params)
        algo = ALGOS[algo_id](x_real=x_real, config=algo_config, base_config=base_config)
    else:
        algo = ALGOS[algo_id](x_real=x_real, base_config=base_config)
    return algo


def get_dataset_configuration(dataset):
    if dataset == 'ECG':
        generator = [('id=100', dict(filenames=['100']))]
    elif dataset == 'STOCKS':
        generator = (('_'.join(asset), dict(assets=asset)) for asset in [('SPX',), ('SPX', 'DJI')])
    elif dataset == 'BINANCE':
        # generator = (('_'.join(asset), dict(assets=asset)) for asset in [('BTC',)])
        generator = (('_'.join(asset), dict(assets=asset)) for asset in [('BTC', 'ETH')])
        # generator = (('_'.join(asset), dict(assets=asset)) for asset in [('BTC',), ('ETH',), ('BTC', 'ETH')])
    elif dataset == 'VAR':
        par1 = itertools.product([1], [(0.2, 0.8), (0.5, 0.8), (0.8, 0.8)])
        par2 = itertools.product([2], [(0.2, 0.8), (0.5, 0.8), (0.8, 0.8), (0.8, 0.2), (0.8, 0.5)])
        par3 = itertools.product([3], [(0.2, 0.8), (0.5, 0.8), (0.8, 0.8), (0.8, 0.2), (0.8, 0.5)])
        combinations = itertools.chain(par1, par2, par3)
        generator = (
            ('dim={}_phi={}_sigma={}'.format(dim, phi, sigma), dict(dim=dim, phi=phi, sigma=sigma))
            for dim, (phi, sigma) in combinations
        )
    elif dataset == 'ARCH':
        generator = (('lag={}'.format(lag), dict(lag=lag)) for lag in [3])
    elif dataset == 'SINE':
        generator = [('a', dict())]
    else:
        raise Exception('%s not a valid data type.' % dataset)
    return generator

Below is the core of `train.py`. Comments are made between the code to explain what each section does.

In [4]:
def run(algo_id, base_config, base_dir, dataset, spec, data_params={}):
    """ Create the experiment directory, calibrate algorithm, store relevant parameters. """
    print('Executing: %s, %s, %s' % (algo_id, dataset, spec))
    experiment_directory = pt.join(base_dir, dataset, spec, 'seed={}'.format(base_config.seed), algo_id)
    if not pt.exists(experiment_directory):
        # if the experiment directory does not exist we create the directory <<<<
        os.makedirs(experiment_directory)
    
    # >>>> Set seed for exact reproducibility of the experiments <<<<
    set_seed(base_config.seed)
    
    # >>>> initialise dataset and algo <<<<
    x_real = get_data(dataset, base_config.p, base_config.q, **data_params)
    x_real = x_real.to(base_config.device)

    # train test split
    # test set is used to compare with data generated by the generator trained by training set.
    size_train = int(x_real.shape[0] * 0.8)
    indices = np.random.permutation(x_real.shape[0])
    train_idx, test_idx = indices[:size_train], indices[size_train:]
    x_real_train, x_real_test = x_real[train_idx], x_real[test_idx]

    algo = get_algo(algo_id, base_config, dataset, data_params, x_real_train)

    # >>>> Train the algorithm <<<<
    algo.fit()

    # >>> Traing Ends Here <<<

    # >>>> create summary <<<<
    create_summary(dataset, base_config.device, algo.G, base_config.p, base_config.q, x_real_test, experiment_directory)
    savefig('summary.png', experiment_directory)

    # >>>> Save generator weights, real path and hyperparameters. <<<<
    # >>>> Also, graph the paths to see how different they are. <<<<
    pickle_it(x_real, pt.join(pt.dirname(experiment_directory), 'x_real.torch'))
    random_indices = torch.randint(0, x_real.shape[0], (250,))
    for asset_i in range(x_real.shape[2]):
        plt.plot( torch.transpose(x_real[random_indices, base_config.p:, asset_i], 0, 1) , 'C%s' % asset_i, alpha=0.1)
    plt.ylim( (-0.2,0.2) )
    plt.savefig(os.path.join(experiment_directory, 'x_real.png'))
    plt.clf()

    pickle_it(x_real_test, pt.join(pt.dirname(experiment_directory), 'x_real_test.torch'))
    random_indices = torch.randint(0, x_real_test.shape[0], (250,))
    for asset_i in range(x_real_test.shape[2]):
        plt.plot( torch.transpose( x_real_test[random_indices, base_config.p:, asset_i], 0, 1) , 'C%s' % asset_i, alpha=0.1)
    plt.ylim( (-0.2,0.2) )
    plt.savefig(os.path.join(experiment_directory, 'x_real_test.png'))
    plt.clf()
    
    pickle_it(x_real_train, pt.join(pt.dirname(experiment_directory), 'x_real_train.torch'))
    random_indices = torch.randint(0, x_real_train.shape[0], (250,))
    for asset_i in range(x_real_train.shape[2]):
        plt.plot( torch.transpose( x_real_train[random_indices, base_config.p:, asset_i], 0, 1) , 'C%s' % asset_i, alpha=0.1)
    plt.ylim( (-0.2,0.2) )
    plt.savefig(os.path.join(experiment_directory, 'x_real_train.png'))
    plt.clf()

    pickle_it(algo.training_loss, pt.join(experiment_directory, 'training_loss.pkl'))
    pickle_it(algo.G.to('cpu').state_dict(), pt.join(experiment_directory, 'G_weights.torch'))
    
    # >>>> Log some results <<<<
    algo.plot_losses()
    savefig('losses', experiment_directory)


def main(args):
    if not pt.exists('./data'):
        os.mkdir('./data')

    print('Start of training. CUDA: %s' % args.use_cuda)
    for dataset in args.datasets:
        for algo_id in args.algos:
            for seed in range(args.initial_seed, args.initial_seed + args.num_seeds):
                
                print(f"dataset={dataset} / algo={algo_id} / seed={seed}")
                
                base_config = BaseConfig(
                        device='cuda:{}'.format(args.device) if args.use_cuda and torch.cuda.is_available() else 'cpu',
                    seed=seed,
                    batch_size=args.batch_size,
                    hidden_dims=args.hidden_dims,
                    p=args.p,
                    q=args.q,
                    total_steps=args.total_steps,
                    mc_samples=1000,
                )
                set_seed(seed)
                generator = get_dataset_configuration(dataset)
                for spec, data_params in generator:
                    run(
                        algo_id=algo_id,
                        base_config=base_config,
                        data_params=data_params,
                        dataset=dataset,
                        base_dir=args.base_dir,
                        spec=spec,
                    )

To start training, run the block below

In [None]:
import argparse
class Args(argparse.Namespace):
    base_dir     = './numerical_results'
    use_cuda     = 'store_true'
    device       = 0
    num_seeds    = 1
    initial_seed = 0
    datasets     = ['BINANCE', ]
    algos        = ['CWGAN','SigCWGAN',]
    batch_size   = 200
    p            = 24
    q            =  6
    hidden_dims  = 3 * (50,)
    total_steps  = 100

args = Args()
main(args)


# Notations

* $N$ is the total number of closing prices for each asset.
* $d$ is the number of total assets.
* $p$ is the length of past data that we are conditioning on.
* $q$ is the length of generated data.

# `get_data()`

First, call `get_binance_dataset()` which reads the csv of each Binance asset and concatenates all of their closing prices into a 3D-tensor whose size is $(1,N,d)$. Here, $N$ is the total number of closing prices for each asset and $d$ is the number of total assets.

Then, the $(1,N,d)$ tensor is thrown into `zero_based_rolling_window()`. The output is a $( \, N-(p+q) \, , \, p+q \, , \, d \, )$ 3D-tensor, where $p$ is the length of past data that we are conditioning on and $q$ is the length of generated data. This is done by following this procedure:

1. Call each entry of the second dimension of the $(1,N,d)$ tensor $x_t$, so $t=0,1,\ldots,N-1$.
2. For $t=0$ to $N-(p+q)$ (the start of each windonw)
    1. First take the next $p+q$ $x_t$'s, which are $x_t,\ldots,x_{t+(p+q-1)}$.
    2. Compute $y_{s} := \dfrac{ (x_s-x_t) }{ x_t }$ for $s=t,t+1,\ldots,t+(p+q-1)$, which is the relative change of price to the price at the start of the window.
    3. Collect $y_t,y_{t+1},\ldots,y_{t+(p+q-1)}$ to form a $(1,p+q,d)$ tensor.
3. Collect all $(1,p+q,d)$ tensors to form a $(N-(p+q),p+q,d)$ tensor. Return this tensor.

# `algo.fit()`

## `GANs.py`

The class `GAN` whose base class is `BaseAlgo` is equipped with a `ResFNN` discriminator and a `SimpleGenerator` generator from `base.py`.

The `SimpleGenerator` has a `ArFNN` (autoregressive feedforward neural network) architecture and works as follows:

1. `input_dim` $= p \times d$, `output_dim` $= d$, `hidden_dims` $=(50,50,50)$, and `latent_dim` $= d$ are the inputs to initialize `SimpleGenerator`.

*Architecture of AR-FNN*

$$ (x,z) \in \mathbb{R}^{p \times d} \times \mathbb{R}^{1 \times d} = \mathbb{R}^{(p+1) \times d} \overset{A_1}\longrightarrow \mathbb{R}^{50} \overset{\phi_\alpha}\longrightarrow \mathbb{R}^{50} \overset{R_2}\longrightarrow \mathbb{R}^{50} \overset{R_3}\longrightarrow \mathbb{R}^{50} \overset{A_4}\longrightarrow \mathbb{R}^{d} $$

In `gans.py`, the generator `G` calls the `sample()` function, which iteratively generates the future path according to Algorithm 1 on Page 15. See the comments in code to understand how input data is transformed.

In [None]:
class ArFNN(nn.Module):
    def __init__(self, input_dim: int, output_dim: int, hidden_dims: Tuple[int]):
        pass

    def forward(self, z, x_past):
        x_generated = list()
        for t in range(z.shape[1]):
            # d=2, p=24
            # Layer A_1 starts here
            z_t = z[:, t:t + 1]
            # z_t: torch.Size([200000, 1, d=2])  x:torch.Size([200000, 1, p*d=48])
            x_in = torch.cat([z_t, x_past.reshape(x_past.shape[0], 1, -1)], dim=-1)
            # x_in: torch.Size([200000, 1, d*(p+1)=50])
            # Layer A_1 outputs here
            
            # >>> ResFNN Generator <<<
            x_gen = self.network(x_in)  # Calls ResFNN().network(), layer A_4 outputs here.
            # x_gen:torch.Size([200000, 1, d=2])
            
            x_past = torch.cat([x_past[:, 1:], x_gen], dim=1) # iterative replace and append
            # x_past:torch.Size([200000, 1, p=24])
            x_generated.append(x_gen)
        x_fake = torch.cat(x_generated, dim=1)
        return x_fake

class SimpleGenerator(ArFNN):
    def __init__(self, input_dim: int, output_dim: int, hidden_dims: Tuple[int], latent_dim: int):
        super(SimpleGenerator, self).__init__(input_dim + latent_dim, output_dim, hidden_dims)
        self.latent_dim = latent_dim

    def sample(self, steps, x_past):
        '''
        [Usage] generator.sample( q, x_past ) where x_past has length p.
        '''
        # self.latent_dim = d
        z = torch.randn(x_past.size(0), steps, self.latent_dim).to(x_past.device)
        return self.forward(z, x_past)

For every loop in `ArFNN.forward()`, the network of `ResFNN` is instantiated. In our example, `hiddem_dims` took $(50,50,50)$ as an input, so, as a result of the for loop below, there will be two $R_i$'s, outputing a $\mathbb{R}^{50}$ tensor after $R_3$. Finally, one more `Linear` layer ($A_4$) is appended. This maps  $\mathbb{R}^{50}$ to $\mathbb{R}^d$.

In [2]:
p = 24
d = 2

class ResidualBlock():
    def __init__(self, input_dim: int, output_dim: int) -> None:
        super(ResidualBlock, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
        self.activation = nn.PReLU()
        self.create_residual_connection = True if input_dim == output_dim else False

class ResFNN(nn.Module):
    def __init__(self, input_dim=(p+1)*d, output_dim=d, hidden_dims=(50,50,50), flatten: bool = False):
        blocks = list()
        input_dim_block = input_dim  # initially R^{ (p+1) * d }
        for hidden_dim in hidden_dims:
            blocks.append(ResidualBlock(input_dim_block, hidden_dim))  # blocks A_1, R_2, and R_3
            input_dim_block = hidden_dim  # becomes R^{ 50 }
        blocks.append(nn.Linear(input_dim_block, output_dim))