### Loading data

In [49]:
import pandas as pd
import numpy as np

from utils.data_loader import load_all

In [50]:
df = load_all()
df = df.loc[~df['AP'].isna()]
df.head()

Unnamed: 0,AP,ARR,ARW,G,OP,ORR,ORW,a5.c,wig2,^aex,...,SEK,CHF,THB,TTD,TND,AED,GBP,USD,UYU,VEB
2000-01-03,415.9,549.11,354.45,401.26,275.08,520.13,230.72,1204.88,1852.9,675.44,...,0.085771,0.456726,,0.115867,,0.197875,,0.726696,,
2000-01-04,404.41,533.89,357.14,401.42,275.08,520.02,229.63,1194.41,1796.6,642.25,...,,0.465253,0.019568,0.115445,,0.197034,1.18701,0.723608,,0.001114
2000-01-05,400.04,527.38,351.19,401.59,275.08,519.22,229.22,1192.89,1777.0,632.31,...,0.08674,0.466615,0.019422,0.11551,,0.197039,1.18624,0.723627,,0.001114
2000-01-06,410.15,522.02,347.96,401.75,275.07,519.62,228.82,,1832.1,624.21,...,,0.46865,0.019427,0.115662,,0.19726,1.19474,0.724439,,0.001115
2000-01-07,429.16,533.16,351.87,401.93,275.07,520.8,230.09,1223.61,1933.2,644.86,...,,0.465233,0.01941,0.115876,,0.197989,1.19596,0.727113,,0.001118


In [51]:
df.shape

(4801, 200)

In [52]:
fund_colnames = ['AP', 'ARR', 'ARW', 'G', 'OP', 'ORR', 'ORW']

In [53]:
funds_df = df[fund_colnames]

In [54]:
funds_df.shape

(4801, 7)

### Feature and target selection

Firstly, we will try to train a classifier that selects the best performing model.

In [55]:
test_set_len = 2*year_days
train_df = funds_df.iloc[:-test_set_len]
test_df = funds_df.iloc[test_set_len:]

### Model training

In [56]:
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x23b634c8390>

In [66]:
class BasicLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(BasicLSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim)
        self.fc1 = nn.Linear(hidden_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.hidden = self.init_hidden()

    def init_hidden(self):
        return (torch.zeros(1, 1, self.hidden_dim),
                torch.zeros(1, 1, self.hidden_dim))

    def forward(self, tensor):
        tensor, self.hidden = self.lstm(
            tensor.view(len(tensor), 1, -1), 
            self.hidden
        )
        tensor = self.fc1(tensor)
        tensor = self.fc2(tensor)
        tensor = torch.sigmoid(tensor)
        return tensor

##### Training parameters

In [67]:
n_epochs = 100
lr = 0.02137
model_hidden_dim = 49
min_seq_len = 3*year_days
val_set_len = 2*year_days

##### Training setup

In [68]:
model = BasicLSTM(7, model_hidden_dim, 7)
loss_function = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

##### Training loop

In [69]:
for epoch in range(n_epochs):
    # training
    for target_idx in tqdm(range(min_seq_len, len(train_df)-val_set_len)):
        sequence = torch.Tensor(train_df.iloc[:target_idx].values)
        target = torch.Tensor(train_df.iloc[target_idx].values)
        # reset state
        model.zero_grad()
        model.hidden = model.init_hidden()
        # forward
        pred = model(sequence)
        # backprop
        loss = loss_function(pred, target)
        loss.backward()
        optimizer.step()
    # validation
    with torch.no_grad():
        losses = torch.zeros(y_val.shape[0])
        for i, target_idx in tqdm(enumerate(range(len(train_df)-val_set_len, len(train_df)))):
            sequence = torch.Tensor(train_df.iloc[:target_idx].values)
            target = torch.Tensor(train_df.iloc[target_idx].values)
            pred = model(sequence)
            losses[i] = loss_function(pred, target)
        pd.Series(losses.numpy()).plot(title=f"Epoch {epoch} validation loss")
        plt.show()

  1%|▊                                                                               | 33/3037 [00:18<26:00,  1.92it/s]

KeyboardInterrupt: 