In [1]:
# This notebook uses Long Short-Term Memory (LSTM) on multivariate time series data to predict the closing stock price of a corporation using the past 60 day stock price.

# Import the libraries
import math
import pandas_datareader as web
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.autograd import Variable
plt.style.use('fivethirtyeight')

# Get cuda device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Get the past stock price
# today's date
today = pd.to_datetime('today').strftime('%Y-%m-%d')
# 5 year ago
start = pd.to_datetime('today') - pd.DateOffset(years=5)
df = web.DataReader('AAPL', data_source='yahoo', start=start, end=today)

In [5]:
# prepare data
target_data = "Close"
feature = list(df.columns.difference([target_data]))

forecast_lead = 1
target = f"{target_data}_t+{forecast_lead}"

df[target] = df[target_data].shift(-forecast_lead)
df = df.iloc[:-forecast_lead]

# split data
train_size = int(len(df) * 0.8)
test_size = len(df) - train_size
train, test = df.iloc[0:train_size], df.iloc[train_size:len(df)]

In [8]:
# normalise data
scaler = MinMaxScaler(feature_range=(0, 1))
train = scaler.fit_transform(train)
test = scaler.transform(test)


In [None]:
# Create custom dataset
class SequenceDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe, target, features, seq_len = 60):
        self.dataframe = dataframe
        self.target = target
        self.seq_len = seq_len
        self.x = torch.tensor(dataframe[features].values, dtype=torch.float32)
        self.y = torch.tensor(dataframe[target].values, dtype=torch.float32)

    def __len__(self):
        return self.x.shape[0]

    def __getitem__(self, idx):
        x = self.data[idx:idx + self.seq_len]
        y = self.target[idx + self.seq_len]
        return x, y