In [24]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import yfinance as yf
from dataset import SlidingWindowTransformer
from models.lstm import LSTMForecaster
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from training_arguments import TrainingArguments

### Load Dataset

In [2]:
AMZN = yf.download('AMZN', start='2013-01-01', end='2019-12-31', progress=False)
col = ['Adj Close', 'Open', 'High', 'Low', "Close", "Volume"]
all_data = AMZN[['Adj Close', 'Open', 'High', 'Low', "Close", "Volume"]].round(2)
all_data

Unnamed: 0_level_0,Adj Close,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-01-02,12.87,12.80,12.90,12.66,12.87,65420000
2013-01-03,12.92,12.86,13.04,12.82,12.92,55018000
2013-01-04,12.96,12.88,12.99,12.83,12.96,37484000
2013-01-07,13.42,13.15,13.49,13.13,13.42,98200000
2013-01-08,13.32,13.35,13.45,13.18,13.32,60214000
...,...,...,...,...,...,...
2019-12-23,89.65,89.41,89.65,89.23,89.65,42728000
2019-12-24,89.46,89.69,89.78,89.38,89.46,17626000
2019-12-26,93.44,90.05,93.52,89.97,93.44,120108000
2019-12-27,93.49,94.15,95.07,93.30,93.49,123732000


In [3]:
train_df, test_df = train_test_split(all_data, test_size=0.2, shuffle=False)

input_cols = ['Adj Close', 'Open', 'High', 'Low', "Close", "Volume"]
output_cols = ['Adj Close']
X_train, y_train = train_df[input_cols], train_df[output_cols]
X_test, y_test = test_df[input_cols], test_df[output_cols]

In [45]:
window_size = 24
forecast_size = 1
step_size = 1

lstm = LSTMForecaster(
    window_size=window_size,
    forecast_size=forecast_size,
    hidden_size=128,
    num_layers=1,
    in_features=len(input_cols),
    out_features=len(output_cols),
    training_args=TrainingArguments(
        criterion=nn.MSELoss,
        optimizer=torch.optim.Adam,
        lr=0.003,
        max_epochs=150,
        batch_size=32,
        device='cuda',
    ),
)

In [46]:
model = Pipeline(
    steps=[
        ('scaler', StandardScaler()),
        ('slding', SlidingWindowTransformer(window_size=window_size, forecast_size=forecast_size, step_size=step_size)),
        ('lstm', lstm)
    ]
)
model.fit(X=X_train, y=y_train)

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1       [36m47.7532[0m     [32m5043.0654[0m  0.2333
      2      437.9277     [32m1368.4975[0m  0.0817
      3      132.2384      [32m690.2832[0m  0.0750
      4       59.9912      [32m171.4048[0m  0.0755
      5       [36m17.5862[0m       [32m83.5046[0m  0.0750
      6        [36m5.5051[0m       [32m74.0088[0m  0.0764
      7        [36m3.1307[0m       79.4372  0.0740
      8        3.6457       96.0618  0.0763
      9        [36m2.5055[0m       89.7869  0.0750
     10        [36m2.2876[0m       92.0399  0.0730
     11        [36m2.1583[0m       94.5210  0.0740
     12        [36m2.1415[0m       97.3484  0.0740
     13        2.2015      100.0710  0.0750
     14        2.2774      102.3051  0.0750
     15        2.3297      104.0382  0.0740
     16        2.3493      105.6164  0.0740
     17        2.3537      107.3811  0.0762
     18        2.3642      109.4292  0

In [47]:
y_pred = model.predict(X=X_test)
_, y_true = model['slding'].transform(X=None, y=y_test)

In [48]:
mse_list = [mean_squared_error(true, pred) for true, pred in zip(y_true, y_pred)]
average_mse = np.mean(mse_list)

print(average_mse)

23.919762
