# Forecast - LSTM

## Libraries and Data

In [None]:
# Install libraries
!pip install pyyaml==5.4.1
!pip install darts

In [3]:
# Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from darts.timeseries import TimeSeries
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.dataprocessing.transformers import Scaler
from darts.models import RNNModel



In [4]:
# Data
df = pd.read_csv('../Data/nyc-data.csv', index_col=0, parse_dates=True)
future_df = pd.read_csv('../Data/future.csv', index_col=0, parse_dates=True)
df

Unnamed: 0_level_0,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-01,720.000885,0,0,0,3.68,41.305
2015-01-02,581.276773,0,0,0,4.73,131.574
2015-01-03,754.117039,0,0,0,7.23,162.700
2015-01-04,622.252774,0,0,0,10.96,160.281
2015-01-05,785.373319,0,0,0,6.92,51.077
...,...,...,...,...,...,...
2020-12-27,685.915026,0,0,0,2.89,38.674
2020-12-28,998.051170,0,0,0,8.83,166.712
2020-12-29,847.123399,0,0,0,3.48,161.865
2020-12-30,857.521043,0,0,0,5.97,179.634


In [5]:
future_df

Unnamed: 0_level_0,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-01,,0,0,0,5.0,154.221
2021-01-02,,0,0,0,11.11,264.805
2021-01-03,,0,0,0,3.89,115.499
2021-01-04,,0,0,0,6.67,124.65
2021-01-05,,0,0,0,5.56,77.968
2021-01-06,,0,0,0,5.56,234.2
2021-01-07,,0,0,0,6.11,142.041
2021-01-08,,0,0,0,3.89,252.094
2021-01-09,,0,0,0,2.78,100.483
2021-01-10,,0,0,0,6.11,71.6


In [6]:
# Extract regressors
X_train = df.iloc[:, 1:]
X_future = future_df.iloc[:, 1:]

In [7]:
# Merge both
X = pd.concat([X_train, X_future])
X

Unnamed: 0_level_0,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-01,0,0,0,3.68,41.305
2015-01-02,0,0,0,4.73,131.574
2015-01-03,0,0,0,7.23,162.700
2015-01-04,0,0,0,10.96,160.281
2015-01-05,0,0,0,6.92,51.077
...,...,...,...,...,...
2021-01-27,0,0,0,3.33,39.664
2021-01-28,0,0,0,1.67,195.314
2021-01-29,0,0,0,-2.78,235.894
2021-01-30,0,0,0,1.11,152.752


In [8]:
# Rename variable
df = df.rename(columns={'Demand': 'y'})
df.head(1)

Unnamed: 0_level_0,y,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-01,720.000885,0,0,0,3.68,41.305


## Prepare for LSTM

In [9]:
# Time Series objects
series = TimeSeries.from_series(df.y)
covariates = TimeSeries.from_dataframe(X)

In [10]:
# Time variables

# year
year_series = datetime_attribute_timeseries(
    pd.date_range(start=series.start_time(),
                  freq=series.freq_str,
                  periods=X.shape[0]),
    attribute='year',
    one_hot=False)

# month
month_series = datetime_attribute_timeseries(year_series,
                                             attribute='month',
                                             one_hot=True)

# weekday
weekday_series = datetime_attribute_timeseries(year_series,
                                               attribute='weekday',
                                               one_hot=True)

In [11]:
# Scaling variables - less computationally intenstive and better for neural networks
transformer1 = Scaler()
transformer2 = Scaler()

In [12]:
# Normalize (scale) the y
y_transformed = transformer1.fit_transform(series)

In [13]:
# Scale the covariates
covariates = covariates.stack(year_series)  # Add year_series to covariates
covariates_transformed = transformer2.fit_transform(covariates)
covariates_transformed = covariates_transformed.stack(month_series)
covariates_transformed = covariates_transformed.stack(weekday_series)
covariates_transformed

## LSTM

In [14]:
# Get best parameters
parameters = pd.read_csv('best-params-lstm.csv',
                         index_col=0)
parameters

Unnamed: 0,14
dropout,0.2
hidden_dim,20.0
input_chunk_length,20.0
lr,0.001
n_epochs,20.0
n_rnn_layers,2.0
training_length,20.0
rmse,96.274268


In [15]:
# Assign parameters
dropout = float(parameters.loc['dropout'][0])
hidden_dim = int(parameters.loc['hidden_dim'][0])
input_chunk_length = int(parameters.loc['input_chunk_length'][0])
lr = float(parameters.loc['lr'][0])
n_epochs = int(parameters.loc['n_epochs'][0])
n_rnn_layers = int(parameters.loc['n_rnn_layers'][0])
training_length = int(parameters.loc['training_length'][0])

In [16]:
# Model - most of these parameters pulled right from documentation
model = RNNModel(model='LSTM',
                 hidden_dim=hidden_dim,
                 n_rnn_layers=n_rnn_layers,
                 dropout=dropout,
                 n_epochs=n_epochs,
                 optimizer_kwargs={'lr': lr},
                 random_state=1502,
                 training_length=training_length,
                 input_chunk_length=input_chunk_length,
                 pl_trainer_kwargs = {'accelerator': 'gpu'},
                 save_checkpoints = True)

In [17]:
# Fit the model to the data
model.fit(y_transformed,
          future_covariates=covariates_transformed)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 7.2 K 
4 | V             | Linear           | 21    
---------------------------------------------------
7.2 K     Trainable params
0         Non-trainable params
7.2 K     Total params
0.029     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


RNNModel(model=LSTM, hidden_dim=20, n_rnn_layers=2, dropout=0.2, training_length=20, n_epochs=20, optimizer_kwargs={'lr': 0.001}, random_state=1502, input_chunk_length=20, pl_trainer_kwargs={'accelerator': 'gpu'}, save_checkpoints=True)

## Predictions and exporting

In [19]:
# Predictions
predictions_lstm = model.predict(n=len(future_df),
                                 future_covariates=covariates_transformed)
predictions_lstm = TimeSeries.pd_series(transformer1.inverse_transform(predictions_lstm)).rename('lstm')
predictions_lstm

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Date
2021-01-01    734.494726
2021-01-02    871.013356
2021-01-03    779.134781
2021-01-04    933.938906
2021-01-05    791.818786
2021-01-06    966.589101
2021-01-07    846.743179
2021-01-08    756.869905
2021-01-09    891.956623
2021-01-10    804.699124
2021-01-11    958.335480
2021-01-12    804.316250
2021-01-13    968.892628
2021-01-14    861.867117
2021-01-15    756.474567
2021-01-16    902.502691
2021-01-17    803.706220
2021-01-18    954.882283
2021-01-19    806.310295
2021-01-20    979.746389
2021-01-21    872.214015
2021-01-22    769.074376
2021-01-23    916.815816
2021-01-24    824.058235
2021-01-25    967.588317
2021-01-26    825.811454
2021-01-27    991.230737
2021-01-28    888.472970
2021-01-29    785.796669
2021-01-30    927.805983
2021-01-31    831.993225
Freq: D, Name: lstm, dtype: float64

In [20]:
# Exporting
predictions_lstm.to_csv('Ensemble/predictions-lstm.csv')