# Parameter Tuning - LSTM

LSTM: Long Short-Term Memory, specialized Recurrent Neural Network

## Libraries and Data

In [None]:
# Install libraries
!pip install pyyaml==5.4.1
!pip install darts

In [18]:
# Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import ParameterGrid
from darts.timeseries import TimeSeries
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.dataprocessing.transformers import Scaler
from darts.models import RNNModel
from sklearn.metrics import mean_squared_error

In [4]:
# Data
df = pd.read_csv('../Data/nyc-data.csv', index_col=0, parse_dates=True)
df

Unnamed: 0_level_0,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-01,720.000885,0,0,0,3.68,41.305
2015-01-02,581.276773,0,0,0,4.73,131.574
2015-01-03,754.117039,0,0,0,7.23,162.700
2015-01-04,622.252774,0,0,0,10.96,160.281
2015-01-05,785.373319,0,0,0,6.92,51.077
...,...,...,...,...,...,...
2020-12-27,685.915026,0,0,0,2.89,38.674
2020-12-28,998.051170,0,0,0,8.83,166.712
2020-12-29,847.123399,0,0,0,3.48,161.865
2020-12-30,857.521043,0,0,0,5.97,179.634


In [5]:
# Rename variable
df = df.rename(columns={'Demand': 'y'})
df.head(1)

Unnamed: 0_level_0,y,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-01,720.000885,0,0,0,3.68,41.305


# Prepare for LSTM

In [6]:
# Time Series objects
series = TimeSeries.from_series(df.y)
covariates = TimeSeries.from_dataframe(df.iloc[:, 1:])

In [7]:
# Time variables

# year
year_series = datetime_attribute_timeseries(
    pd.date_range(start=series.start_time(),
                  freq=series.freq_str,
                  periods=df.shape[0]),
    attribute='year',
    one_hot=False)

# month
month_series = datetime_attribute_timeseries(year_series,
                                             attribute='month',
                                             one_hot=True)

# weekday
weekday_series = datetime_attribute_timeseries(year_series,
                                               attribute='weekday',
                                               one_hot=True)

In [8]:
# Scaling variables - less computationally intenstive and better for neural networks
transformer1 = Scaler()
transformer2 = Scaler()

In [9]:
# Normalize (scale) the y
y_transformed = transformer1.fit_transform(series)

In [10]:
# Scale the covariates
covariates = covariates.stack(year_series)  # Add year_series to covariates
covariates_transformed = transformer2.fit_transform(covariates)
covariates_transformed = covariates_transformed.stack(month_series)
covariates_transformed = covariates_transformed.stack(weekday_series)
covariates_transformed

## LSTM Parameters

* Dropout: mechanism to prevent overfitting
  * Fraction of neurons ignored
* N_rnn_layers: number of hidden layers
* Hidden_dim: how many feature maps per layer
* N_epochs: number of complete iterations through training set
* Lr: How much the model learns from errors
* Training_length: how much training and test is included for each model
  * Must be > ICL (see next point)
* Input_chunk_length: number of time steps fed to model

## LSTM

In [15]:
# Model - most of these parameters pulled right from documentation
model = RNNModel(model='LSTM',
                 hidden_dim=20,
                 n_rnn_layers=2,
                 dropout=0,
                 n_epochs=20,
                 optimizer_kwargs={'lr': 0.003},
                 random_state=1502,
                 training_length=20,
                 input_chunk_length=15,
                 pl_trainer_kwargs = {'accelerator': 'gpu'},
                 save_checkpoints = True)

In [16]:
# Fit the model to the data
model.fit(y_transformed,
          future_covariates=covariates_transformed)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 7.2 K 
4 | V             | Linear           | 21    
---------------------------------------------------
7.2 K     Trainable params
0         Non-trainable params
7.2 K     Total params
0.029     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


RNNModel(model=LSTM, hidden_dim=20, n_rnn_layers=2, dropout=0, training_length=20, n_epochs=20, optimizer_kwargs={'lr': 0.003}, random_state=1502, input_chunk_length=15, pl_trainer_kwargs={'accelerator': 'gpu'}, save_checkpoints=True)

## Cross-validation

In [None]:
# CV
cv = model.historical_forecasts(y_transformed,
                                future_covariates=covariates_transformed,
                                start=df.shape[0] - 180,
                                forecast_horizon=31,
                                stride=16,
                                retrain=True,
                                last_points_only=False)

In [19]:
# Re-transform the forecasts

# Store results
rmse_cv = []

# Loop
for i in range(len(cv)):
  # predictions
  predictions = TimeSeries.pd_series(transformer1.inverse_transform(cv[i]))

  # actuals
  start = predictions.index.min()
  end = predictions.index.max()
  actuals = df.y[start:end]

  # Compute error
  error_cv = np.sqrt(mean_squared_error(actuals, predictions))

  # Save error
  rmse_cv.append(error_cv)

In [21]:
np.mean(rmse_cv)

118.6788162578079

## Parameter Tuning

Splitting into 2 tuning passes to cut down on time.

Number of epochs will be the repeated one across both.

In [32]:
# Grid
param_grid = {'n_rnn_layers': [2, 4],
              'hidden_dim': [20, 30],
              'dropout': [0, 0.2],
              'n_epochs': [10, 20],
              'lr': [0.003],
              'training_length': [20],
              'input_chunk_length': [15]}
grid = ParameterGrid(param_grid)
len(list(grid))

16

In [None]:
# Parameter tuning loop

rmse = []

# Loop
for params in grid:
  # Build model
  model = RNNModel(model='LSTM',
                   hidden_dim=params['hidden_dim'],
                   n_rnn_layers=params['n_rnn_layers'],
                   dropout=params['dropout'],
                   n_epochs=params['n_epochs'],
                   optimizer_kwargs={'lr': params['lr']},
                   random_state=1502,
                   training_length=params['training_length'],
                   input_chunk_length=params['input_chunk_length'],
                   pl_trainer_kwargs = {'accelerator': 'gpu'},
                   save_checkpoints = True)

  # Fit model
  model.fit(y_transformed,
            future_covariates=covariates_transformed)

  # Cross-validation
  cv = model.historical_forecasts(y_transformed,
                                  future_covariates=covariates_transformed,
                                  start=df.shape[0] - 180,
                                  forecast_horizon=31,
                                  stride=16,
                                  retrain=True,
                                  last_points_only=False)

  # Loop through CV
  rmse_cv = []
  for i in range(len(cv)):
    predictions = TimeSeries.pd_series(transformer1.inverse_transform(cv[i]))
    start = predictions.index.min()
    end = predictions.index.max()
    actuals = df.y[start:end]
    error_cv = np.sqrt(mean_squared_error(actuals, predictions))
    rmse_cv.append(error_cv)

  # Find average error and store
  error = np.mean(rmse_cv)
  rmse.append(error)

Took 1 hour to complete.

In [29]:
# Check the results
tuning_results = pd.DataFrame(grid)
tuning_results['rmse'] = rmse
tuning_results

Unnamed: 0,dropout,hidden_dim,input_chunk_length,lr,n_epochs,n_rnn_layers,training_length,rmse
0,0.0,20,15,0.003,10,2,20,102.676367
1,0.0,20,15,0.003,10,4,20,132.378091
2,0.0,20,15,0.003,20,2,20,118.678816
3,0.0,20,15,0.003,20,4,20,144.606894
4,0.0,30,15,0.003,10,2,20,131.098654
5,0.0,30,15,0.003,10,4,20,126.110347
6,0.0,30,15,0.003,20,2,20,159.317197
7,0.0,30,15,0.003,20,4,20,146.551351
8,0.2,20,15,0.003,10,2,20,98.166068
9,0.2,20,15,0.003,10,4,20,109.075645


In [34]:
# Get best params
best_params = tuning_results[tuning_results.rmse == tuning_results.rmse.min()].transpose()
best_params

Unnamed: 0,8
dropout,0.2
hidden_dim,20.0
input_chunk_length,15.0
lr,0.003
n_epochs,10.0
n_rnn_layers,2.0
training_length,20.0
rmse,98.166068


In [35]:
# Assign best params
n_rnn_layers = int(best_params.loc['n_rnn_layers'])
hidden_dim = int(best_params.loc['hidden_dim'])
dropout = float(best_params.loc['dropout'])

## Parameter Tuning, Round 2

In [36]:
# Grid
param_grid = {'n_rnn_layers': [n_rnn_layers],
              'hidden_dim': [hidden_dim],
              'dropout': [dropout],
              'n_epochs': [10, 20],
              'lr': [0.003, 0.001],
              'training_length': [20, 30],
              'input_chunk_length': [15, 20]}
grid = ParameterGrid(param_grid)
len(list(grid))

16

In [None]:
# Parameter tuning loop

rmse = []

# Loop
for params in grid:
  # Build model
  model = RNNModel(model='LSTM',
                   hidden_dim=params['hidden_dim'],
                   n_rnn_layers=params['n_rnn_layers'],
                   dropout=params['dropout'],
                   n_epochs=params['n_epochs'],
                   optimizer_kwargs={'lr': params['lr']},
                   random_state=1502,
                   training_length=params['training_length'],
                   input_chunk_length=params['input_chunk_length'],
                   pl_trainer_kwargs = {'accelerator': 'gpu'},
                   save_checkpoints = True)

  # Fit model
  model.fit(y_transformed,
            future_covariates=covariates_transformed)

  # Cross-validation
  cv = model.historical_forecasts(y_transformed,
                                  future_covariates=covariates_transformed,
                                  start=df.shape[0] - 180,
                                  forecast_horizon=31,
                                  stride=16,
                                  retrain=True,
                                  last_points_only=False)

  # Loop through CV
  rmse_cv = []
  for i in range(len(cv)):
    predictions = TimeSeries.pd_series(transformer1.inverse_transform(cv[i]))
    start = predictions.index.min()
    end = predictions.index.max()
    actuals = df.y[start:end]
    error_cv = np.sqrt(mean_squared_error(actuals, predictions))
    rmse_cv.append(error_cv)

  # Find average error and store
  error = np.mean(rmse_cv)
  rmse.append(error)

Took 1 hour to run.

In [38]:
# Check the results
tuning_results = pd.DataFrame(grid)
tuning_results['rmse'] = rmse
tuning_results

Unnamed: 0,dropout,hidden_dim,input_chunk_length,lr,n_epochs,n_rnn_layers,training_length,rmse
0,0.2,20,15,0.003,10,2,20,98.166068
1,0.2,20,15,0.003,10,2,30,107.473737
2,0.2,20,15,0.003,20,2,20,107.214398
3,0.2,20,15,0.003,20,2,30,100.311841
4,0.2,20,15,0.001,10,2,20,127.555618
5,0.2,20,15,0.001,10,2,30,128.123358
6,0.2,20,15,0.001,20,2,20,97.214062
7,0.2,20,15,0.001,20,2,30,107.790203
8,0.2,20,20,0.003,10,2,20,97.073735
9,0.2,20,20,0.003,10,2,30,105.122033


In [39]:
# Get best params
best_params = tuning_results[tuning_results.rmse == tuning_results.rmse.min()].transpose()
best_params

Unnamed: 0,14
dropout,0.2
hidden_dim,20.0
input_chunk_length,20.0
lr,0.001
n_epochs,20.0
n_rnn_layers,2.0
training_length,20.0
rmse,96.274268


In [40]:
# Export best parameters
best_params.to_csv('../Forecasting-Product/best-params-lstm.csv')