In [1]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from datetime import datetime, timedelta

# Data Preprocessing

In [2]:
# import data
macro_data = pd.read_csv('/Users/mac/Desktop/PycharmProjects/TAADL/DATA/macroeconomic.csv', index_col='Date')
price_data = pd.read_csv('/Users/mac/Desktop/PycharmProjects/TAADL/DATA/price_volume.csv', index_col='Date')

macro_data.index = pd.to_datetime(macro_data.index)
price_data.index = pd.to_datetime(price_data.index)

In [3]:
# find dates that both data exist
index = pd.to_datetime(np.intersect1d(macro_data.index, price_data.index))

In [4]:
# concatenate price and macro features
feature = pd.concat([price_data.loc[index,:], macro_data.loc[index,:]], axis=1)
feature = feature.dropna(axis=0)

In [5]:
feature.head()

Unnamed: 0,BNDX_ret030,BNDX_ret060,BNDX_ret130,BNDX_ret260,BND_ret030,BND_ret060,BND_ret130,BND_ret260,VGK_ret030,VGK_ret060,...,Oil,SP500,US_Pay,US_GDP,US_CPI,10Y-2YSpread,2Y-3MSpread,Cop/Gold,SP500/DJIA,SP500/Rus2000
2007-01-18,0.001124,0.003436,0.006991,0.014936,0.000652,0.001243,0.002075,0.00683,0.003104,0.052411,...,50.48,1426.37,137249.0,0.779358,2.54065,-0.14,-0.213,0.00395,0.113493,1.809816
2007-01-19,0.001094,0.003822,0.00721,0.014777,0.00079,0.001187,0.001771,0.006768,0.006454,0.057328,...,51.99,1430.5,137249.0,0.779358,2.54065,-0.149,-0.179,0.003937,0.113843,1.838382
2007-01-22,0.001374,0.0036,0.007369,0.014703,0.00093,0.001348,0.001741,0.00702,-0.006114,0.052737,...,51.13,1422.95,137249.0,0.779358,2.54065,-0.154,-0.195,0.003978,0.114044,1.812767
2007-01-23,0.00158,0.003839,0.007256,0.014417,0.000907,0.001375,0.001701,0.007063,0.006741,0.061168,...,55.04,1427.99,137249.0,0.779358,2.54065,-0.144,-0.16,0.003979,0.113931,1.83643
2007-01-24,0.002012,0.003991,0.006842,0.014332,0.000637,0.001256,0.001589,0.006941,0.008007,0.060499,...,55.37,1440.13,137249.0,0.779358,2.54065,-0.123,-0.182,0.003996,0.114099,1.830852


# Network Training Scheme

In [6]:
# train-test data split
tr_date = pd.to_datetime('2014-12-31')
te_date = tr_date + timedelta(days=1)
ii_date = feature.index[0] # initial date of feature data

df_train = feature.loc[:tr_date]
df_test = feature.loc[te_date:]

In [7]:
target_list = ['BNDX_ret130','BND_ret130','VGK_ret130','VNQI_ret130','VNQ_ret130','VTI_ret130','VWOB_ret130','VWO_ret130']
target = feature.iloc[130:2004+130][target_list]
target = target.add_prefix('targ_')
target.index = df_train.index

In [11]:
from network import MLP

UPDATE_FREQ = 30
BATCH_SIZE  = 50
LEARNING_RATE = 1e-3

# call the network for training
model = MLP()
model.weight_init()

# set up the loss function and optimizer
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE, weight_decay=1e-3)

idx_list = [i for i in range(500,df_train.shape[0],UPDATE_FREQ)] # window lengths list
num_windows = len(idx_list)

for pointer in range(num_windows):
    if pointer == 0: # epoch calculator
        print('Initialize the training...')
        epochs = 500
    else:
        epochs = 50

    tr_idx = idx_list[pointer] 
    # take out training features and lables
    X, y = df_train.iloc[:tr_idx], target.iloc[:tr_idx]

    # normalize input X 
    mu, std = X.mean(axis=0), X.std(axis=0)
    X = X.sub(mu).div(std)

    # convert to tensor
    X, y = torch.Tensor(X.values), torch.Tensor(y.values)

    for epoch in range(epochs):
        optimizer.zero_grad()
        
        # get prediction and its loss with gradients
        y_pred = model(X)
        loss = loss_fn(y_pred, y)
        loss.backward()

        # backpropagation
        optimizer.step()
        
        if (epoch%25 == 0) or (epoch == epochs):
            print(f'Batch No. {pointer}, Epoch {epoch}: Loss = {loss}')
    

Initialize the training...
Batch No. 0, Epoch 0: Loss = 11.101202964782715
Batch No. 0, Epoch 25: Loss = 2.121514320373535
Batch No. 0, Epoch 50: Loss = 1.0813286304473877
Batch No. 0, Epoch 75: Loss = 0.6220840215682983
Batch No. 0, Epoch 100: Loss = 0.4302310347557068
Batch No. 0, Epoch 125: Loss = 0.363066703081131
Batch No. 0, Epoch 150: Loss = 0.2600279748439789
Batch No. 0, Epoch 175: Loss = 0.21947035193443298
Batch No. 0, Epoch 200: Loss = 0.18047352135181427
Batch No. 0, Epoch 225: Loss = 0.1621735543012619
Batch No. 0, Epoch 250: Loss = 0.14130637049674988
Batch No. 0, Epoch 275: Loss = 0.12959496676921844
Batch No. 0, Epoch 300: Loss = 0.11158451437950134
Batch No. 0, Epoch 325: Loss = 0.100038081407547
Batch No. 0, Epoch 350: Loss = 0.09595189243555069
Batch No. 0, Epoch 375: Loss = 0.08808155357837677
Batch No. 0, Epoch 400: Loss = 0.08327885717153549
Batch No. 0, Epoch 425: Loss = 0.07227706909179688
Batch No. 0, Epoch 450: Loss = 0.06907312572002411
Batch No. 0, Epoch 47

In [20]:
model(X)

tensor([[ 0.0711, -0.0727,  0.0409,  ..., -0.0629,  0.0094,  0.1325],
        [ 0.0150, -0.0010,  0.0265,  ...,  0.0288,  0.0060,  0.1315],
        [-0.0663,  0.0298,  0.1447,  ...,  0.0923,  0.0358,  0.2894],
        ...,
        [-0.1254,  0.0381,  0.0391,  ...,  0.0706,  0.0636,  0.0369],
        [ 0.0250,  0.0815,  0.0964,  ..., -0.0728,  0.1063,  0.2501],
        [-0.0431,  0.0029,  0.2910,  ...,  0.0697,  0.1465,  0.5761]],
       grad_fn=<AddmmBackward0>)

In [32]:
model.train()

model(X)

tensor([[ 3.2013e-02,  4.6438e-02,  1.2144e-01,  ...,  9.2975e-02,
         -7.5194e-03,  3.1306e-01],
        [-1.9703e-02, -2.4527e-02,  7.3693e-02,  ...,  5.2306e-02,
         -1.6744e-02,  1.6092e-01],
        [ 9.0374e-02, -5.0003e-03,  8.6113e-02,  ..., -3.7595e-02,
          6.6569e-02,  1.8225e-01],
        ...,
        [-1.5495e-02, -2.5992e-04,  1.5170e-01,  ...,  8.6246e-02,
          6.6618e-02,  2.7597e-01],
        [-3.0425e-02,  8.7637e-03,  7.6034e-02,  ...,  3.1262e-02,
          2.5262e-02,  4.2205e-02],
        [ 7.6047e-02,  8.5301e-02,  1.3959e-01,  ...,  8.6441e-02,
          2.0859e-01,  2.4312e-01]], grad_fn=<AddmmBackward0>)

In [None]:
# model uncertainty
SIMULATION_ROUND = 1000
