In [1]:
%load_ext autoreload
%autoreload 2


In [2]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
from typing import Union
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler

import os 

while "notebooks" in os.getcwd():
    os.chdir("..")

from src.preprocessing.lstm.make_dataset import StockReturnsDataset

In [23]:
prices_df = yf.download(
    tickers= "SPY",
    start="2020-12-30",
    end="2022-12-30"
)

[*********************100%***********************]  1 of 1 completed


In [24]:
prices = prices_df[["Close"]]

In [9]:
scaler = MinMaxScaler(feature_range=(-1, 1))

In [30]:
scaled_prices = scaler\
    .fit_transform(prices)\
    .reshape(-1,1)

In [38]:
scaled_prices_df = pd.DataFrame(
    scaled_prices,
    index = prices.index, 
)\
    .rename(columns = {
        0 scaled_prices_df: "price"
    })

In [43]:
lookback = 10
for i in range(1, lookback + 1):
    scaled_prices_df[f"d-{i}"] = scaled_prices_df\
        .price\
        .shift(i)
        
scaled_prices_df.dropna(inplace = True)

In [48]:
y = scaled_prices_df["price"]\
    .to_numpy()\
    .reshape(-1,1)

In [49]:
y = torch\
    .from_numpy(y)\
    .type(torch.Tensor)

In [None]:
scaled

In [58]:
X = scaled_prices_df.iloc[:, 1:]\
    .to_numpy()\
    .reshape((
        scaled_prices_df.shape[0], 
        scaled_prices_df.shape[1] - 1 , 
        1
    ))



In [60]:
X = torch\
    .from_numpy(X)\
    .type(torch.Tensor)

In [51]:
y.shape

torch.Size([494, 1])

In [145]:
returns_dataloader = DataLoader(
    returns_dataset,
    batch_size= 1,
    shuffle=True
)

## Building LSTM model


In [146]:
lookback = 100
input_dim = 1
hidden_dim = 5
num_layers = 2
output_dim = 1
num_epochs = 20

In [147]:
class LSTM(nn.Module):
    def __init__(self, 
                 input_dim, 
                 hidden_dim, 
                 num_layers, 
                 output_dim
                ):
        
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        # print(h0.shape, x.shape)
        out, (h, c) = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :]) 
        return out

In [148]:
model = LSTM(
    input_dim,
    hidden_dim,
    num_layers,
    output_dim
)

In [149]:
treinable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

In [150]:
treinable_params

406

In [151]:
loss_fn = torch.nn.MSELoss()
optimiser = torch.optim.Adam(model.parameters(), lr=0.001)

In [152]:
X

tensor([[[ 0.0079],
         [-0.0288],
         [ 0.0282],
         [-0.0054],
         [-0.0069],
         [ 0.0733],
         [ 0.0378],
         [-0.0102],
         [ 0.0777],
         [-0.0100]]])

In [153]:
hist = np.zeros(num_epochs)

for t in (range(num_epochs)):
    for X, y in tqdm(returns_dataloader):
        y_train_pred = model(X)

        loss = loss_fn(y_train_pred, y)
        
        hist[t] = loss.item()
        optimiser.zero_grad()
        loss.backward()
        optimiser.step()

    with torch.no_grad():
        y_pred = model(returns_dataset[:][0])
        y = returns_dataset[:][1]
        epoch_accuracy = (y_pred.sign() == y.sign()).sum()/y.size(0)

        print(f"Epoch {t+1} accuracy : {epoch_accuracy}")

  0%|          | 0/3922 [00:00<?, ?it/s]

100%|██████████| 3922/3922 [00:10<00:00, 356.94it/s]


Epoch 1 accuracy : 0.4752677083015442


100%|██████████| 3922/3922 [00:14<00:00, 269.01it/s]


Epoch 2 accuracy : 0.4752677083015442


100%|██████████| 3922/3922 [00:13<00:00, 284.32it/s]


Epoch 3 accuracy : 0.4752677083015442


100%|██████████| 3922/3922 [00:14<00:00, 269.89it/s]


Epoch 4 accuracy : 0.5201427936553955


100%|██████████| 3922/3922 [00:17<00:00, 226.48it/s]


Epoch 5 accuracy : 0.5201427936553955


100%|██████████| 3922/3922 [00:14<00:00, 279.23it/s]


Epoch 6 accuracy : 0.505609393119812


100%|██████████| 3922/3922 [00:14<00:00, 274.02it/s]


Epoch 7 accuracy : 0.5201427936553955


100%|██████████| 3922/3922 [00:13<00:00, 288.69it/s]


Epoch 8 accuracy : 0.4752677083015442


 32%|███▏      | 1257/3922 [00:04<00:10, 263.92it/s]


KeyboardInterrupt: 

In [113]:
hist

array([6.20609440e-04, 1.21760252e-03, 3.61778715e-04, 1.43581280e-03,
       4.49815823e-04, 2.63162469e-03, 2.29830021e-05, 1.37747370e-03,
       1.28702712e-04, 1.52274035e-04, 1.19904277e-03, 1.67016638e-04,
       1.53039291e-05, 4.10288747e-04, 4.14957380e-04, 4.32084635e-05,
       2.35700369e-04, 1.12152286e-03, 3.36647063e-04, 2.39328248e-04])

In [155]:
y_pred = model(returns_dataset[:][0])

In [156]:
y = returns_dataset[:][1]

In [157]:
(y_pred.sign() == y.sign()).sum()/y.size(0)

tensor(0.5201)

In [167]:
(y_pred > e-2).sum()

tensor(0)

In [51]:
aapl_validation = yf.download(
    tickers= "AAPL",
    start="2016-12-30",
    end="2018-12-30"
)

[*********************100%***********************]  1 of 1 completed


In [54]:
returns_val = aapl_validation\
    .pct_change()\
    .dropna()