# Import Library

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Create Dataset and DataLoader

In [2]:
# Datasets for stock 
class StockDataset(Dataset):
    def __init__(self, ticker, interval="1d", period="max", n_windows=64):
        self.interval = interval
        self.period = period
        self.n_windows = n_windows

        # Retrieve raw data
        self.ticker = yf.Ticker(ticker)
        data = self.ticker.history(interval="1d", period="max")
        data = data.reset_index()
        data = data.drop(["Dividends", "Stock Splits", "Volume", "Date"], axis=1)
        data = data.to_numpy()
        self.data = data

        # Calculate return of interest
        n = data.shape[0]
        shape = data.shape
        rot = np.zeros((shape[0] - 1, shape[1]))
        
        for i in range(n-1):
            rot[i] = (data[i+1] - data[i]) / data[i]

        # Preprocessing to X and y
        self.X = np.zeros((len(data) - n_windows, n_windows, 4))
        self.y = np.zeros((len(data) - n_windows, 4))
        assert len(self.X) == len(self.y), "Size of X and y is not equal"

        for i in range(rot[:,0].shape[0] - (n_windows + 1)):
            self.X[i] = rot[i:i+n_windows,:]
            self.y[i] = rot[i+n_windows,:]
        self.rot = rot

    def __len__(self):
        return len(self.y)

    def __getitem__(self, index):
        return self.X[index], self.y[index]

In [19]:
# DataLoader
training_data = StockDataset("MSFT", n_windows=128)
dataloader = DataLoader(training_data, batch_size=64, shuffle=True)

train_features, train_predition = next(iter(dataloader))
print(train_features.shape)
print(train_predition.shape)

torch.Size([64, 128, 4])
torch.Size([64, 4])


# Model

In [33]:
test_x = torch.Tensor(train_features).to(torch.float32)
test_x.shape

torch.Size([64, 128, 4])

In [37]:
test_input = torch.randn(64, 1, 4)

test_layer = nn.LSTM(input_size = 4, hidden_size = 64, num_layers=1, batch_first=True)

h_0 = torch.zeros(1, 64, 64).requires_grad_()
c_0 = torch.zeros(1, 64, 64).requires_grad_()

out, (hn, cn) = test_layer(test_input, (h_0, c_0))

In [65]:
class LSTM_Model(nn.Module):
    def __init__(self, input_size=4, hidden_size=128, num_layers=1, output_size=4):
        super(LSTM_Model, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Initialize hidden and cell states with zeros
        h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_()
        c_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_()
        
        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        # If we don't, we'll backprop all the way to the start even after going through several iterations
        out, (hn, cn) = self.lstm(x, (h_0.detach(), c_0.detach()))
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

In [68]:
X = torch.Tensor(train_features).to(torch.float32)
y = torch.Tensor(train_predition).to(torch.float32)

# Hyperparameters

model = LSTM_Model(num_layers=1)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)


num_epochs = 100

for i in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X)
    loss = criterion(output.squeeze(), y)
    loss.backward()
    optimizer.step()

    if (i + 1) % 10 == 0:
        print(f'Epoch [{i+1}/{num_epochs}], Loss: {loss.item():.7f}')

Epoch [10/100], Loss: 0.0004037
Epoch [20/100], Loss: 0.0002688
Epoch [30/100], Loss: 0.0002163
Epoch [40/100], Loss: 0.0002016
Epoch [50/100], Loss: 0.0001913
Epoch [60/100], Loss: 0.0001843
Epoch [70/100], Loss: 0.0001774
Epoch [80/100], Loss: 0.0001706
Epoch [90/100], Loss: 0.0001636
Epoch [100/100], Loss: 0.0001568
