Runs GAN model for UK data

In [1]:
# Library
import logging

import pandas as pd
import tensorflow as tf
from common import configpath, load_data, projectpath, split_data

from ap import create_gan, sharpe_loss, train_gan

# logging
logging.basicConfig(
    format='%(asctime)s %(message)s',
    filename="logs/training_UK.log", level=logging.DEBUG)

# set seed for TensorFlow
tf.random.set_seed(20220102)

2022-03-23 21:47:16.507139: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [2]:
"""Load data"""
select = "factor"
with_macro = False

data = load_data(select, load_macro=with_macro)
firm = data["firm"]
if with_macro:
    macro = data["macro"]
else:
    macro = None

firm.info()
firm.head()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 200872 entries, ('3IN', Timestamp('1998-03-31 00:00:00')) to ('ZYT', Timestamp('2017-12-31 00:00:00'))
Data columns (total 9 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   excess_returns  200872 non-null  float64
 1   hml             200872 non-null  float64
 2   r12_7           200872 non-null  float64
 3   r2_1            200872 non-null  float64
 4   rf              200872 non-null  float64
 5   rm              200872 non-null  float64
 6   rmrf            200872 non-null  float64
 7   smb             200872 non-null  float64
 8   umd             200872 non-null  float64
dtypes: float64(9)
memory usage: 14.6+ MB


In [3]:
if with_macro:
    macro.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 238 entries, 1998-03-31 to 2017-12-31
Columns: 110 entries, EMP to PPI_MOTOR
dtypes: float64(110)
memory usage: 206.4 KB


In [4]:
"""Split data"""
processed_data = split_data(firm, macro)
train_data = processed_data["train"]
valid_data = processed_data["valid"]
test_data = processed_data["test"]
if with_macro:
    train_data_list = [train_data["macro"], train_data["firm"]]
    valid_data_list = [valid_data["macro"], valid_data["firm"]]
    test_data_list = [test_data["macro"], test_data["firm"]]
else:
    train_data_list = [train_data["firm"]]
    valid_data_list = [valid_data["firm"]]
    test_data_list = [test_data["firm"]]

In [5]:
"""Create networks."""
train_networks = create_gan(configpath=configpath, data=train_data)
valid_networks = create_gan(configpath=configpath, data=valid_data)
test_networks = create_gan(configpath=configpath, data=test_data)

2022-03-23 21:47:17.627827: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-03-23 21:47:17.628419: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-03-23 21:47:17.658844: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-23 21:47:17.659107: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce GTX 1080 computeCapability: 6.1
coreClock: 1.7335GHz coreCount: 20 deviceMemorySize: 7.90GiB deviceMemoryBandwidth: 298.32GiB/s
2022-03-23 21:47:17.659125: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2022-03-23 21:47:17.660109: I tensorflow/stream_executor/platform/d

Model: "discriminant_network"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
macro_input (InputLayer)        [(None, 110, 1)]     0                                            
__________________________________________________________________________________________________
macro_dropout (Dropout)         (None, 110, 1)       0           macro_input[0][0]                
__________________________________________________________________________________________________
macro_lstm (LSTM)               (None, 4)            96          macro_dropout[0][0]              
__________________________________________________________________________________________________
expand_dim (Lambda)             (None, 1, 4)         0           macro_lstm[0][0]                 
_______________________________________________________________________________

Model: "discriminant_network"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
macro_input (InputLayer)        [(None, 110, 1)]     0                                            
__________________________________________________________________________________________________
macro_dropout (Dropout)         (None, 110, 1)       0           macro_input[0][0]                
__________________________________________________________________________________________________
macro_lstm (LSTM)               (None, 4)            96          macro_dropout[0][0]              
__________________________________________________________________________________________________
expand_dim (Lambda)             (None, 1, 4)         0           macro_lstm[0][0]                 
_______________________________________________________________________________

In [None]:
"""Train GAN models."""
train_gan(
    configpath=configpath,
    train_data=train_data,
    train_networks=train_networks,
    valid_data=valid_data,
    valid_networks=valid_networks
)

In [None]:
"""Compute final pricing loss and Sharpe loss for all data"""
# Loss for train
sdf_train = train_networks["discriminant_network"](train_data_list)
sharpe_loss_train = sharpe_loss(sdf_train)
logging.info(f"GAN Trained train SHARPE loss: {sharpe_loss_train}")

# Loss for valid
sdf_valid = valid_networks["discriminant_network"](valid_data_list)
sharpe_loss_valid = sharpe_loss(sdf_valid)
logging.info(f"GAN Trained valid SHARPE loss: {sharpe_loss_valid}")

# Loss for test
test_networks["discriminant_network"].set_weights(
    train_networks["discriminant_network"].get_weights()
)
sdf_test = test_networks["discriminant_network"](test_data_list)
sharpe_loss_test = sharpe_loss(sdf_test)
logging.info(f"GAN Trained test SHARPE loss: {sharpe_loss_test}")

In [None]:
# export sdf
sdf_train_df = pd.DataFrame(sdf_train, columns=["sdf"])
sdf_valid_df = pd.DataFrame(sdf_valid, columns=["sdf"])
sdf_test_df = pd.DataFrame(sdf_test, columns=["sdf"])
sdf_df = pd.concat([sdf_train_df, sdf_valid_df, sdf_test_df], axis=0)
sdf_df.index = firm.index.get_level_values("date").unique()

sdf_df.to_csv(
    f"{projectpath}/data/results/sdf_uk_{select}_macro_{with_macro}.csv"
)

sdf_df.head()