In [1]:
import torch
import torch.nn as nn
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import import_ipynb
from Utils import *

importing Jupyter notebook from Utils.ipynb


In [2]:
import torch

def save_checkpoint(model,optimizer,filepath):
    state = {
    'state_dict': model.state_dict(),
    'optimizer': optimizer.state_dict()
    }
    torch.save(state, filepath)

def load_checkpoint(model,optimizer,filepath):
    # "lambda" allows to load the model on cpu in case it is saved on gpu
    state = torch.load(filepath,lambda storage, loc: storage)
    model.load_state_dict(state['state_dict'])
    optimizer.load_state_dict(state['optimizer'])
    return model,optimizer


In [23]:
TRAIN_START_PERIOD = '1990-01-01'
TRAIN_END_PERIOD = '2000-01-01'

TEST_START_PERIOD = '2000-01-01'
TEST_END_PERIOD = '2001-01-01'

In [24]:
from torch.utils.data import Dataset, DataLoader

class FinanceDataset(Dataset):
    def __init__(self, start_date, end_date, asset_to_forecast, time_to_forecast):
        self.finance_data_df = create_data_df(use='dataset', start_date=start_date, end_date=end_date) 
        self.data_tensor = torch.from_numpy(self.finance_data_df.values)
        
        self.asset_to_forecast = asset_to_forecast
        self.time_to_forecast = time_to_forecast
        self.prepare_labels()
        
    def prepare_labels(self):
        self.labels_df = self.finance_data_df.shift(-self.time_to_forecast)[self.asset_to_forecast]
        self.labels_tensor = torch.from_numpy(self.labels_df.values)
        
    def __len__(self):
        return self.finance_data_df.shape[0]
    
    def __getitem__(self, idx):
        return self.data_tensor[idx], self.labels_tensor[idx]

In [25]:
class RNN(nn.Module):
    def __init__(self, data_dim, output_size, hidden_dim, n_layers):
        super(RNN, self).__init__()
        
        self.hidden_dim=hidden_dim

        # define an RNN with specified parameters
        # batch_first means that the first dim of the input and output will be the batch_size
        self.rnn = nn.RNN(data_dim, hidden_dim, n_layers, batch_first=True)
        
        # last, fully-connected layer
        self.fc = nn.Linear(hidden_dim, output_size)

    def forward(self, x, hidden):
        # x (batch_size, seq_length, input_size)
        # hidden (n_layers, batch_size, hidden_dim)
        # r_out (batch_size, time_step, hidden_size)
        batch_size = x.size(0)
        
        # get RNN outputs
        r_out, hidden = self.rnn(x, hidden)
        # shape output to be (batch_size*seq_length, hidden_dim)
        r_out = r_out.view(-1, self.hidden_dim)  
        
        # get final output 
        output = self.fc(r_out)
        
        return output, hidden

In [26]:
train_finance_dataset = FinanceDataset(TRAIN_START_PERIOD, TRAIN_END_PERIOD, '3 Mo', 1)
test_finance_dataset = FinanceDataset(TEST_START_PERIOD, TEST_END_PERIOD, '3 Mo', 1)

In [27]:
train_data_loader = DataLoader(train_finance_dataset, batch_size=24, shuffle=False, num_workers=0)
test_data_loader = DataLoader(test_finance_dataset, batch_size=24, shuffle=False, num_workers=0)

In [28]:
dataiter = iter(train_data_loader)
samples, labels = dataiter.next()
# print(samples)
# print(labels)

In [29]:
#hyper parameters
input_size=samples.size(1) 
output_size=1
hidden_dim=32
n_layers=2

rnn = RNN(input_size, output_size, hidden_dim, n_layers)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.001)

In [30]:
PATH = 'rnn.pkl'

def train(rnn, optimizer, epochs=1):

    hidden = None      
    min_loss = 100
    
    for epoch in range(epochs):
        for batch_i, data in enumerate(train_data_loader):
            inputs, labels = data

            inputs = inputs.unsqueeze(0)
            inputs = inputs.float()
            labels = labels.float()

            prediction, hidden = rnn(inputs, hidden)
            hidden = hidden.data

            labels = labels.unsqueeze(1)
            # calculate the loss
            loss = criterion(prediction, labels)
            # zero gradients
            optimizer.zero_grad()
            # perform backprop and update weights
            loss.backward()
            optimizer.step()
            
            if loss.item() < min_loss:
                min_loss = loss.item()
                save_checkpoint(rnn,optimizer,PATH)

            # display loss and predictions
            print('Epoch: {}, Loss: {} '.format(epoch, loss.item()))


In [31]:
def test(trained_rnn, optimizer, epochs=1):
    hidden = None      
    trained_rnn.eval()
    for epoch in range(epochs):
        for batch_i, data in enumerate(test_data_loader):
            inputs, labels = data

            inputs = inputs.unsqueeze(0)
            inputs = inputs.float()
            labels = labels.float()
            labels = labels.unsqueeze(1)
            
            with torch.no_grad():
                prediction, hidden = trained_rnn(inputs, hidden)
                loss = criterion(prediction, labels)
                print('Epoch: {}, Loss: {} '.format(epoch, loss.item()))
        

In [32]:
epochs = 1

train(rnn, optimizer)
trained_rnn, optimizer = load_checkpoint(rnn,optimizer,PATH)
test(trained_rnn, optimizer)




Epoch: 0, Loss: 57.968231201171875 
Epoch: 0, Loss: 57.85779571533203 
Epoch: 0, Loss: 56.75642013549805 
Epoch: 0, Loss: 53.58875274658203 
Epoch: 0, Loss: 51.107975006103516 
Epoch: 0, Loss: 48.3720703125 
Epoch: 0, Loss: 43.92620849609375 
Epoch: 0, Loss: 40.51518630981445 
Epoch: 0, Loss: 37.05945587158203 
Epoch: 0, Loss: 33.82000732421875 
Epoch: 0, Loss: 25.600379943847656 
Epoch: 0, Loss: 21.195905685424805 
Epoch: 0, Loss: 19.47658348083496 
Epoch: 0, Loss: 16.18085479736328 
Epoch: 0, Loss: 14.630372047424316 
Epoch: 0, Loss: 14.614950180053711 
Epoch: 0, Loss: 13.1470365524292 
Epoch: 0, Loss: 11.256665229797363 
Epoch: 0, Loss: 9.32597827911377 
Epoch: 0, Loss: 6.306405067443848 
Epoch: 0, Loss: 3.430784225463867 
Epoch: 0, Loss: 2.5012426376342773 
Epoch: 0, Loss: 2.8264000415802 
Epoch: 0, Loss: 2.3287596702575684 
Epoch: 0, Loss: 1.5174857378005981 
Epoch: 0, Loss: 1.4354310035705566 
Epoch: 0, Loss: 0.48387405276298523 
Epoch: 0, Loss: 0.3117234706878662 
Epoch: 0, Loss