# Main file for LSTM regression model

In [None]:
%%capture
# import libraries
import pandas as pd
import numpy as np
import math
import warnings
warnings.filterwarnings('ignore')
from regression_helper_functions_ import ts_data, train_test, train_validation, plot_acutal_predict, model_performance, rmse, convert 
from regression_helper_functions_ import grid_search_ses, grid_search_gbr, gbr, grid_search_rf, grid_search_sarima, sarima, arima_predict, grid_search_arima
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
from regression_helper_functions_ import plot_acutal_predict
from sklearn.ensemble import RandomForestRegressor
from statsmodels.tsa.arima_model import ARIMA
import timeit
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.optimizers import SGD
from keras.layers import Bidirectional

In [None]:
def performance_table_lstm(result_dict):
    df_US = pd.DataFrame()
    df_CA = pd.DataFrame()

    df_CA['Frequency'] = result_dict['CA']['Frequency']
    df_CA['MODEL'] = result_dict['CA']['MODEL']
    df_CA['RUN_TIME'] = result_dict['CA']['RUN_TIME']
    df_CA['Prediction Window'] = result_dict['CA']['Prediction_Window']
    df_CA['MAPE'] = result_dict['CA']['MAPE']
    df_CA['RMSE'] = result_dict['CA']['RMSE']
    df_CA['NRMSE'] = result_dict['CA']['NRMSE']
    df_CA['ND'] = result_dict['CA']['ND']
    df_CA['Country'] = 'CA'

    df_US['Frequency'] = result_dict['US']['Frequency']
    df_US['MODEL'] = result_dict['US']['MODEL']
    df_US['RUN_TIME'] = result_dict['US']['RUN_TIME']
    df_US['Prediction Window'] = result_dict['US']['Prediction_Window']
    df_US['MAPE'] = result_dict['US']['MAPE']
    df_US['RMSE'] = result_dict['US']['RMSE']
    df_US['NRMSE'] = result_dict['US']['NRMSE']
    df_US['ND'] = result_dict['US']['ND']
    df_US['Country'] = 'US'

    FRAMES = [df_US, df_CA]
    performance_table = pd.concat(FRAMES)
    
    return performance_table

In [None]:
%%capture
def split_series(t_series, datetime, previous_steps):
    xx, yy, dt = list(), list(), list()
    for i in range(len(t_series)):
        # end of the pattern
        end_ix = i + previous_steps
        # Don't exceed the end of the series!
        if end_ix > len(t_series)-1:
            break
        # input , output in an  autoregressive manner
        seq_x, seq_y, seq_dt = t_series[i:end_ix], t_series[end_ix], datetime[end_ix]
        xx.append(seq_x)
        yy.append(seq_y)
        dt.append(seq_dt)
    return np.array(xx), np.array(yy), np.array(dt)

In [None]:
# helper function to split the series into train-test sets for evaluating LSTM model 
def train_test_lstm(x_train, y_label, dt, train_amount, n):
    return x_train[0:train_amount], y_label[train_amount:train_amount+n], dt[train_amount:train_amount+n]

In [None]:
%%capture
# implement the LSTM model based on the selected model parameters 
def lstm(steps,nodes,activation,optimize):
    model = Sequential()
    model.add(Bidirectional(LSTM(nodes, activation=activation, input_shape=(steps, 1))))
    model.add(Dense(nodes, activation=activation))
    model.add(Dense(1))
    model.compile(optimizer=optimize, loss='mse')
    return model 

In [None]:
# helper function to grid search for optimal parameters
def grid_search_lstm(data):
    # define scope of configs
    n_input, n_nodes, n_epochs, n_activation = [7,14], [40,80,120], [100, 200], ['relu']
    n_optimize = ['Adam', 'Adamax']
    minError, optParam = float('inf'), None
    for i in n_input:
        for j in n_nodes:
            for k in n_epochs:
                for a in n_activation:
                    for o in n_optimize:
                        config_sample = [i, j, k, a, o]
                        RMSEerror = model_error(data, config_sample)
                        print('Model fitted on:', config_sample, 'The error is:', RMSEerror)
                        if RMSEerror < minError:
                            minError, optParam = RMSEerror, config_sample
    return optParam

In [None]:
%%capture
# helper function to calculate the error for each grid search iteration 
def model_error(data, configuration):
    n_input, n_nodes, n_epochs, n_activation, n_optimize = configuration
    y_ = list()
    
    model = lstm(n_input, n_nodes, n_activation, n_optimize)
    
    tt, ts, dt = split_series(data.values, data.index, n_input)
    
    tt = tt.reshape((tt.shape[0], tt.shape[1], 1))
    
    model.fit(tt, ts, epochs=n_epochs,  verbose=0)
    
    for i in range(0,len(tt)):
        x_test_instance = tt[i]
        x_test_instance = x_test_instance.reshape((1,n_input,1))
        y_.append(int(model.predict(x_test_instance,verbose=0)))
        
    error = rmse(ts,y_)
    print('Model fitted on:', configuration)
    return error

In [None]:
# main implementation for LSTM

# evaluate LSTM using daily (24H) data
FREQ = ['24H']
PREDICT_HORIZON = [1, 6, 12, 24, 72]
CONT = ['US','CA','GB','ENTIRE','NORTH_AMERICA']

MODEL_RESULTS = dict()
for i in CONT:
    MODEL_RESULTS[i] = {'MODEL':[],'RUN_TIME':[], 'Frequency':[], 'Prediction_Window':[], 'RMSE':[], 'NRMSE':[], 'MAPE':[], 'ND':[]}
for c in CONT:
    for f in FREQ:
        for p in PREDICT_HORIZON:
            x = ts_data(country = c, category = 'total', frequency = f, model = None)
            #grid search using 70% of the original data
            train_data_gs, test_data_gs = train_test(x,int(len(x)*0.7),0)
            n_input, n_nodes, n_epochs, n_activation, n_optimize = grid_search_lstm(train_data_gs)
                
            lstm_x, lstm_y, lstm_dt = split_series(x.values, x.index, n_input)
            train_s = int(len(lstm_x)*0.7) ### change from x to lstm_x
            test_s = len(lstm_x) - train_s ### change from x to lstm_x
            t = list()
            t_ = list()
            idx = list()
                
            START_TIME = timeit.default_timer()
        
            for i in range(0,int(test_s/p)):
                train, test, datetime = train_test_lstm(lstm_x, lstm_y, lstm_dt, train_s, p)
                
                # train model using training sample
                train = train.reshape((train.shape[0], train.shape[1], 1))
                model = lstm(n_input, n_nodes, n_activation, n_optimize)
                model.fit(train, lstm_y[0:train_s], epochs = n_epochs, verbose=0)
                
                # predict until completion of prediction horizon
                for j in range(0,p):
                    if (len(lstm_x)==train_s):
                        break
                        
                    x_test_instance = lstm_x[train_s+j]
                    x_test_instance = x_test_instance.reshape((1,n_input,1))
                    
                    t.append(test[j])
                    t_.append(int(model.predict(x_test_instance,verbose=0)))
                    
                    idx.append(lstm_dt[train_s+j])
    
                train_s += p
        
            END_TIME = timeit.default_timer()
            TIME = convert(END_TIME-START_TIME)
            
            y_real = pd.DataFrame(t,index=idx)
            y_pred = pd.DataFrame(t_,index=idx)
            
            title = str('LSTM'+' '+f+' '+c+' ')
            plot_acutal_predict(y_real,y_pred,title)
            
            mape, rmse, nrmse, nd = model_performance(y_real, y_pred)
            
            MODEL_RESULTS[c]['MODEL'].append('LSTM')
            MODEL_RESULTS[c]['RUN_TIME'].append(TIME)
            MODEL_RESULTS[c]['Frequency'].append(f)
            MODEL_RESULTS[c]['Prediction_Window'].append(p)
            MODEL_RESULTS[c]['RMSE'].append(rmse)
            MODEL_RESULTS[c]['MAPE'].append(mape)
            MODEL_RESULTS[c]['NRMSE'].append(nrmse)
            MODEL_RESULTS[c]['ND'].append(nd)

TABLE_LSTM = performance_table(MODEL_RESULTS)   
filename = 'Regression_Performance_LSTM'
TABLE_LSTM.to_csv(filename, index = False)