In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import math
import warnings
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from statsmodels.tools.sm_exceptions import ConvergenceWarning
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Conv1D, Dense, Dropout, Flatten, LSTM, MaxPooling1D
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from numpy import array
from keras.layers import Bidirectional
from keras.layers import TimeDistributed
from keras.layers import Flatten
from keras.layers import ConvLSTM2D
from keras.layers import GRU

In [None]:
# In order to use this notebook for univarate time series analysis :-
# 1) The primary requirement is not to have missing values or categorial(string) data for time_dependent variable 
#    and time_column.
# 2) This cell requires information on file_name (only csv), time_dependent_variable, time_column, date_time format (frmt)
#    and resample grain(X). After filling the required information correctly, you can run all the cells (Cell ---> Run All)
# 3) Example :-
#   file_name               = "JetRail Avg Hourly Traffic Data - 2012-2013.csv"
#   time_dependent_variable = "Count"    (column name in your dataset)
#   time_column             = "Datetime" (column name in your dataset)
#   frmt                    = "%Y-%m-%d"
#   X                       = "D" 

file_name = "cta_ridership.csv"
time_dependent_variable = "total_rides"
time_column = "service_date"
frmt =  '%Y-%m-%d'
X = "D"

In [None]:
def data(time_column, file_name, frmt='%Y-%m-%d %H:%M:%S', X= "D"):
    df = pd.read_csv(file_name, parse_dates= True)
    df = df[[time_column,time_dependent_variable]]
    df[time_column] = pd.to_datetime(df[time_column],format=frmt) 
    df.index = df[time_column]
    df = df.resample(X).mean()
    df.reset_index(inplace= True)
    return df
df = data(time_column, file_name, frmt, X)

In [None]:
# Metrics
def timeseries_evaluation_metrics_func(y_true, y_pred):
    def mean_absolute_percentage_error(y_true, y_pred):
        y_true, y_pred = np.array(y_true), np.array(y_pred)
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    print('Evaluation metric results:-')
    print(f'MSE is : {metrics.mean_squared_error(y_true, y_pred)}')      #WMAPE
    print(f'MAE is : {metrics.mean_absolute_error(y_true, y_pred)}')
    print(f'RMSE is : {np.sqrt(metrics.mean_squared_error(y_true, y_pred))}')
    print(f'MAPE is : {mean_absolute_percentage_error(y_true,y_pred)}')
    print(f'R2 is : {metrics.r2_score(y_true, y_pred)}',end='\n\n')

In [None]:
# This splits the data into train, test and validation data with proportions 72%,10% and 18% respectively with default splits.
#def train_test_split_perc(df, test_train_split= 0.9, train_validation_split = 0.7):
    # train_test_size = math.floor(test_train_split*len(df)) #(70% Dataset)
    # train_validation = df.head(train_test_size)
    # train_validation_size = math.floor(train_validation_split*len(train_validation))
    # train =  train_validation.head(train_validation_size)
    # validation = train_validation.tail(len(train_validation) - train_validation_size)
    # test  = df.tail(len(df) - train_test_size)
    # return train,validation,test
    
# train,validation,test = train_test_split_perc(df, test_train_split= 0.9, train_validation_split = 0.8)

In [None]:
def train_test_split_perc(df, split= 0.7):
    total_size=len(df)
    train_size=math.floor(split*total_size) #(70% Dataset)
    train = df.head(train_size)
    test  = df.tail(len(df) - train_size)
    return train,test
    
train,test = train_test_split_perc(df, split= 0.8)

In [None]:
# Standardization or Normalization
# For neural networks to converge quicker, it is helpful to scale the values.
# For example, each feature might be transformed to have a mean of 0 and std. dev. of 1.
# You are working with a mix of features, input timesteps, output horizon, etc.
# which don't work out-of-the-box with common scaling utilities.
# So, here are a couple wrappers to handle scaling and inverting the scaling.

def scale(train_data, test_data, standard):

    standard_scaler = StandardScaler()
    min_max_scaler = MinMaxScaler()

    scale_train = train_data[time_dependent_variable].values.reshape(-1, 1)
    scale_test = test_data[time_dependent_variable].values.reshape(-1, 1)
    #scale_validation = validation_data[time_dependent_variable].values.reshape(-1, 1)

    if standard == True:
      train = standard_scaler.fit_transform(scale_train)
      test = standard_scaler.transform(scale_test)
      #validation = standard_scaler.transform(scale_validation)
    else :
      train = min_max_scaler.fit_transform(scale_train)
      test = min_max_scaler.transform(scale_test)
      #validation = min_max_scaler.transform(scale_validation)
    
    return train, test

scaled_train, scaled_test = scale(train, test, True)

In [None]:
n_steps = 5 # number of lags used in the model
horizon = 1 # number of values, we want the model to predict
# In this function, n_step ---> number of lags used in the model.
#                  horizon ---> number of values, we want the model to predict. [20,30,40]-->[50], In this case we are predicting only 1 value of y.
def data_prep(dataset, n_steps, horizon):
  data = []
  for i in range(len(dataset)):
    data.append(dataset[i][0])
  X = []
  y = []
  for i in range(len(data)):
    end_ix = i + n_steps
    if end_ix > len(data)-horizon:
      break
    seq_x, seq_y = data[i:end_ix], data[end_ix:end_ix + horizon]
    X.append(seq_x)
    y.append(seq_y)
  return array(X), array(y)

X_train, Y_train = data_prep(scaled_train, n_steps, 1)
X_test, Y_test = data_prep(scaled_test, n_steps, 1)
#X_validation, Y_validation = data_prep(scaled_validation, n_steps, 1)

In [None]:
Y_train

In [None]:
def Unscale(prediction_data, test_data, standard):
    
    standard_scaler = StandardScaler()
    min_max_scaler = MinMaxScaler()

    scale_train = train[time_dependent_variable].values.reshape(-1, 1)

    if standard == True:
      data = standard_scaler.fit_transform(scale_train)
      y_pred = standard_scaler.inverse_transform(prediction_data)
      y_test = standard_scaler.inverse_transform(test_data)
    else :
      data = min_max_scaler.fit(scale_train)
      y_pred = min_max_scaler.inverse_transform(prediction_data)
      y_test = min_max_scaler.inverse_transform(test_data)

    return y_pred, y_test  


In [None]:
def history_plot():
  plt.plot(history.history['loss'])
  plt.plot(history.history['val_loss'])
  plt.title('Model loss')
  plt.ylabel('loss')
  plt.xlabel('epoch')
  plt.legend(['train loss', 'validation loss'], loc='upper left')
  plt.rcParams["figure.figsize"] = [10,6]
  plt.show()

In [None]:
def plot_test(prediction, model_name, y_test):
    plt.figure(figsize=(10, 6))
    range_future = len(prediction)
    plt.plot(np.arange(range_future), np.array(y_test),label= 'Test data')
    plt.plot(np.arange(range_future), np.array(prediction),label='Prediction')
    plt.title('Test data vs prediction for ' + model_name)
    plt.legend(loc="upper left")
    plt.xlabel("Date-Time")
    plt.ylabel('{}'.format(time_dependent_variable))

# MODELS

## 1) Vanilla LSTM

In [None]:
# It has only one layer with 50 neurons with relu as our activation function.
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape = (n_steps,1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mape')

In [None]:
callback = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10,verbose=1, mode='min')
history = model.fit(X_train, Y_train,validation_split = 0.2,callbacks = [callback], verbose = 0,epochs=200)
#plot
history_plot()

In [None]:
y_predict = model.predict(X_test, verbose=0)
y_pred,y_test=Unscale(Y_test, y_predict, True)
timeseries_evaluation_metrics_func(y_test, y_pred)

In [None]:
plot_test(y_pred, 'Vanilla-LSTM', y_test)

## 2) Stacked LSTM

In [None]:
# define model
model = Sequential()
model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(n_steps,1)))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

In [None]:
callback = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10,verbose=1, mode='min')
history = model.fit(X_train, Y_train,validation_split = 0.2,callbacks = [callback], verbose = 0,epochs=200)
#plot
history_plot()

In [None]:
y_predict = model.predict(X_test, verbose=0)
y_pred,y_test=Unscale(Y_test, y_predict, True)
timeseries_evaluation_metrics_func(y_test, y_pred)

In [None]:
plot_test(y_pred, 'Stacked-LSTM', y_test)

## 3) Bidirectional LSTM

In [None]:
# define model
model = Sequential()
model.add(Bidirectional(LSTM(50, activation='relu'), input_shape=(n_steps, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

In [None]:
callback = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10,verbose=1, mode='min')
history = model.fit(X_train, Y_train,validation_split = 0.2,callbacks = [callback], verbose = 0,epochs=200)
#plot
history_plot()

In [None]:
y_predict = model.predict(X_test, verbose=0)
y_pred,y_test=Unscale(Y_test, y_predict, True)
timeseries_evaluation_metrics_func(y_test, y_pred)

In [None]:
plot_test(y_pred, 'Bidirectional-LSTM', y_test)

## 4) GRU

In [None]:
    model = Sequential()
    model.add(GRU ( 50 , return_sequences = True, input_shape = (n_steps,1)))
    model.add(GRU(units = 50)) 
    model.add(Dense(units = 1)) 
    model.compile(optimizer="adam",loss="mse")

In [None]:
callback = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10,verbose=1, mode='min')
history = model.fit(X_train, Y_train,validation_split = 0.2,callbacks = [callback], verbose = 0,epochs=200)

#plot
history_plot()

In [None]:
y_predict = model.predict(X_test, verbose=0)
y_pred,y_test=Unscale(Y_test, y_predict, True)
timeseries_evaluation_metrics_func(y_test, y_pred)

In [None]:
plot_test(y_pred, 'GRU', y_test)

## 5) MLP

In [None]:
model = Sequential()
model.add(Dense(100, activation='relu', input_dim=n_steps))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

In [None]:
callback = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10,verbose=1, mode='min')
history = model.fit(X_train, Y_train,validation_split = 0.2,callbacks = [callback], verbose = 0,epochs=200)

#plot
history_plot()

In [None]:
y_predict = model.predict(X_test, verbose=0)
y_pred,y_test=Unscale(Y_test, y_predict, True)
timeseries_evaluation_metrics_func(y_test, y_pred)

In [None]:
plot_test(y_pred, 'MLP', y_test)

## 6) CNN

In [None]:
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps,1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

In [None]:
callback = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10,verbose=1, mode='min')
history = model.fit(X_train, Y_train,validation_split = 0.2,callbacks = [callback], verbose = 0,epochs=200)
#plot
history_plot()

In [None]:
y_predict = model.predict(X_test, verbose=0)
y_pred,y_test=Unscale(Y_test, y_predict, True)
timeseries_evaluation_metrics_func(y_test, y_pred)

In [None]:
plot_test(y_pred, 'MLP', y_test)