In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
from typing import Union
from tqdm import tqdm

In [2]:
data = yf.download(
    tickers= "AAPL",
    start="2009-12-30",
    end="2016-12-30"
)

[*********************100%***********************]  1 of 1 completed


In [3]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-12-30 00:00:00-05:00,7.458214,7.571429,7.439643,7.558571,6.415688,412084400
2009-12-31 00:00:00-05:00,7.611786,7.619643,7.520000,7.526071,6.388103,352410800
2010-01-04 00:00:00-05:00,7.622500,7.660714,7.585000,7.643214,6.487534,493729600
2010-01-05 00:00:00-05:00,7.664286,7.699643,7.616071,7.656429,6.498750,601904800
2010-01-06 00:00:00-05:00,7.656429,7.686786,7.526786,7.534643,6.395380,552160000
...,...,...,...,...,...,...
2016-12-22 00:00:00-05:00,29.087500,29.127501,28.910000,29.072500,27.091923,104343600
2016-12-23 00:00:00-05:00,28.897499,29.129999,28.897499,29.129999,27.145504,56998000
2016-12-27 00:00:00-05:00,29.129999,29.450001,29.122499,29.315001,27.317904,73187600
2016-12-28 00:00:00-05:00,29.379999,29.504999,29.049999,29.190001,27.201420,83623600


In [4]:
data["Close"].plot(
    title= "AAPL close prices"
)

: 

: 

In [3]:
len(data)

1763

In [4]:
data.iloc[2]

Open         7.622500e+00
High         7.660714e+00
Low          7.585000e+00
Close        7.643214e+00
Adj Close    6.487533e+00
Volume       4.937296e+08
Name: 2010-01-04 00:00:00-05:00, dtype: float64

In [5]:
class StockDataset(Dataset):
    def __init__(self,
                 data : Union[pd.Series, pd.DataFrame],
                 lookback : int) -> None:
        super().__init__()
        self.data = data
        self.lookback = lookback

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index : int):
        if index < self.lookback:
            X =self.data\
                .iloc[:index]\
                .dropna()\
                .values
            
            X = torch.tensor(X)
            X = torch.cat([X, torch.zeros(self.lookback - X.size(0) )])

            y = self.data.iloc[index]
            if np.isnan(y):
                y = 0
            return X, y

        X = self.data\
            .iloc[index - self.lookback : index]\
            .values

        X = torch.tensor(X)

        y = self.data.iloc[index]    
        return X, y
              

In [58]:
def load_data(stock, lookback):
    data_raw = stock.values # convert to numpy array
    data = []
    
    # create all possible sequences of length look_back
    for index in range(len(data_raw) - lookback): 
        data.append(data_raw[index: index + lookback])
    
    data = np.array(data)
    test_set_size = int(np.round(0.2*data.shape[0]))
    train_set_size = data.shape[0] - (test_set_size)
    
    x_train = data[:train_set_size,:-1,:]
    y_train = data[:train_set_size,-1,:]
    
    x_test = data[train_set_size:,:-1]
    y_test = data[train_set_size:,-1,:]

    x_train = torch.from_numpy(x_train).type(torch.Tensor)
    x_test = torch.from_numpy(x_test).type(torch.Tensor)

    y_train = torch.from_numpy(y_train).type(torch.Tensor)
    y_test = torch.from_numpy(y_test).type(torch.Tensor)

    
    return [x_train, y_train, x_test, y_test]

In [59]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-12-30 00:00:00-05:00,7.458214,7.571429,7.439643,7.558571,6.415688,412084400
2009-12-31 00:00:00-05:00,7.611786,7.619643,7.520000,7.526071,6.388103,352410800
2010-01-04 00:00:00-05:00,7.622500,7.660714,7.585000,7.643214,6.487533,493729600
2010-01-05 00:00:00-05:00,7.664286,7.699643,7.616071,7.656429,6.498749,601904800
2010-01-06 00:00:00-05:00,7.656429,7.686786,7.526786,7.534643,6.395379,552160000
...,...,...,...,...,...,...
2016-12-22 00:00:00-05:00,29.087500,29.127501,28.910000,29.072500,27.091921,104343600
2016-12-23 00:00:00-05:00,28.897499,29.129999,28.897499,29.129999,27.145510,56998000
2016-12-27 00:00:00-05:00,29.129999,29.450001,29.122499,29.315001,27.317902,73187600
2016-12-28 00:00:00-05:00,29.379999,29.504999,29.049999,29.190001,27.201416,83623600


In [60]:
x_train, y_train, x_test, y_test = load_data(data[["Close"]].pct_change(10).dropna(), lookback = 10)

In [61]:
x_train

tensor([[[-0.0104],
         [-0.0228],
         [ 0.0048],
         ...,
         [-0.0420],
         [-0.0198],
         [ 0.0008]],

        [[-0.0228],
         [ 0.0048],
         [-0.0124],
         ...,
         [-0.0198],
         [ 0.0008],
         [-0.0539]],

        [[ 0.0048],
         [-0.0124],
         [-0.0137],
         ...,
         [ 0.0008],
         [-0.0539],
         [-0.0829]],

        ...,

        [[-0.0230],
         [-0.0178],
         [-0.0302],
         ...,
         [-0.1232],
         [-0.0784],
         [-0.0801]],

        [[-0.0178],
         [-0.0302],
         [-0.0478],
         ...,
         [-0.0784],
         [-0.0801],
         [-0.0721]],

        [[-0.0302],
         [-0.0478],
         [-0.0642],
         ...,
         [-0.0801],
         [-0.0721],
         [-0.0248]]])

## Building LSTM model


In [62]:
lookback = 10
input_dim = 1
hidden_dim = 32
num_layers = 2
output_dim = 1
num_epochs = 20


In [63]:
aapl_dataset = StockDataset(
    data["Close"].pct_change(),
    lookback=lookback
)

aapl_dataloader = DataLoader(
    aapl_dataset,
    batch_size= 1,
    shuffle= False
)

In [64]:
class GRU(nn.Module):
    def __init__(self, 
                 input_dim, 
                 hidden_dim, 
                 num_layers, 
                 output_dim
                ):
        
        super(GRU, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        out, (hn, cn) = self.gru(x, (h0.detach()))
        out = self.fc(out[:, -1, :]) 
        return out

In [65]:
gru = GRU(
    input_dim,
    hidden_dim,
    num_layers,
    output_dim
)

In [66]:
gru(x_train)

tensor([[-0.1163],
        [-0.1162],
        [-0.1161],
        ...,
        [-0.1151],
        [-0.1149],
        [-0.1150]], grad_fn=<AddmmBackward0>)

In [68]:
treinable_params = sum(p.numel() for p in gru.parameters() if p.requires_grad)

In [69]:
treinable_params

9729

In [70]:
loss_fn = torch.nn.BCELoss(reduction='mean')
optimiser = torch.optim.Adam(gru.parameters(), lr=0.01)

In [76]:
y_train.size()

torch.Size([349, 9, 1])

In [73]:
hist = np.zeros(num_epochs)

for t in range(num_epochs):
    y_train_pred = gru(x_train)

    loss = loss_fn(y_train_pred, y_train)
    print("Epoch ", t, "MSE: ", loss.item())
    hist[t] = loss.item()
    optimiser.zero_grad()
    loss.backward()
    optimiser.step()

ValueError: Using a target size (torch.Size([349, 9, 1])) that is different to the input size (torch.Size([1394, 1])) is deprecated. Please ensure they have the same size.

In [91]:
gru(X_batch)

RuntimeError: shape '[1, 10, 32]' is invalid for input of size 10

In [14]:
import time
history = np.zeros(num_epochs)
errors = []

for epoch in range(num_epochs):
    batch_error = torch.tensor([])
    for X_batch, y_batch in aapl_dataloader:
        print(X_batch)
        y_train_pred = gru(X_batch)
        loss = loss_fn(y_train_pred, y_batch)

        batch_error = torch.cat([batch_error, torch.tensor([loss.item()])])

        history[epoch] = loss.item()

        optimiser.zero_grad()
        loss.backward()
        
        optimiser.step()

    errors = torch.cat([errors, torch.tensor([batch_error.mean()])])

    print(f"Epoch: {epoch}, mean error: {errors[-1]}")


tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=torch.float64)
torch.Size([2, 1, 32]) torch.Size([1, 10])


RuntimeError: For unbatched 2-D input, hx should also be 2-D but got 3-D tensor