# ETS (ERROR, TREND AND SEASONALITY) MODELS

In [None]:
# libraries
import pandas as pd 
import math
import numpy as np 
import matplotlib.pyplot as plt
import datetime
from sklearn import metrics
from statsmodels.tsa.api import ExponentialSmoothing,SimpleExpSmoothing, Holt
from statsmodels.tsa.forecasting.theta import ThetaModel

In [None]:
# In order to use this notebook for univarate time series analysis :-
# 1) The primary requirement is not to have missing values or categorial(string) data for time_dependent variable 
#    and time_column.
# 2) This cell requires information on file_name (only csv), time_dependent_variable, time_column, date_time format (frmt)
#    and resample grain(X). After filling the required information correctly, you can run all the cells (Cell ---> Run All)
# 3) Example :-
#   file_name               = "JetRail Avg Hourly Traffic Data - 2012-2013.csv"
#   time_dependent_variable = "Count"    (column name in your dataset)
#   time_column             = "Datetime" (column name in your dataset)
#   frmt                    = "%Y-%m-%d"
#   X                       = "D" 

file_name = "cta_ridership.csv"
time_dependent_variable = "total_rides"
time_column = "service_date"
frmt =  '%Y-%m-%d'
X = "D"

### Reading the csv file

In [None]:
def data(time_column, file_name, frmt='%Y-%m-%d %H:%M:%S', X= "D"):
    df = pd.read_csv(file_name, parse_dates= True)
    df = df[[time_column,time_dependent_variable]]
    df[time_column] = pd.to_datetime(df[time_column],format=frmt) 
    df.index = df[time_column]
    df = df.resample(X).mean()
    df.reset_index(inplace= True)
    return df
df = data(time_column, file_name, frmt, X)

In [None]:
df.head()

### Splitting the data into train and test using (you can use any one of them) :-

In [None]:
# This splits the data into train and test using default split_size = 0.7
def train_test_split_perc(df, split= 0.7):
    total_size=len(df)
    train_size=math.floor(split*total_size) #(70% Dataset)
    train = df.head(train_size)
    test  = df.tail(len(df) -train_size)
    return test,train
    
test,train = train_test_split_perc(df, split= 0.8)

In [None]:
# This splits the data into train and test using split_date
def train_test_split_date(df, split_date):
    split_date = '2017-01-01'
    train = df.loc[df.index <= split_date].copy()
    test = df.loc[df.index > split_date].copy()
    return train, test

#test,train = train_test_split_date(df, split_date)

### Evaluating a model using different metrics

In [None]:
from sklearn import metrics

def timeseries_evaluation_metrics_func(y_true, y_pred):
    def mean_absolute_percentage_error(y_true, y_pred):
        y_true, y_pred = np.array(y_true), np.array(y_pred)
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    print('Evaluation metric results:-')
    print(f'MSE is : {metrics.mean_squared_error(y_true, y_pred)}')
    print(f'MAE is : {metrics.mean_absolute_error(y_true, y_pred)}')
    print(f'RMSE is : {np.sqrt(metrics.mean_squared_error(y_true, y_pred))}')
    print(f'MAPE is : {mean_absolute_percentage_error(y_true,y_pred)}')
    print(f'R2 is : {metrics.r2_score(y_true, y_pred)}',end='\n\n')

# MODELS

### 1) Single Exponential Smoothing Model

In [None]:
# Fitting and forecasting a simple exponential model
fitSESauto = SimpleExpSmoothing(np.asarray(train[time_dependent_variable])).fit(optimized= True)
pred = fitSESauto.forecast(len(test))

# Different metrics for evaluating the model
print(timeseries_evaluation_metrics_func(test[time_dependent_variable],pred))
print(fitSESauto.summary())

# Creating a column for forecasts for plotting
y_hat = test.copy()
index = [i for i in range(len(train),len(df))]    
pred = pd.Series(pred,index=index)
y_hat["pred"] = pred

# Plotting a graph showing results
plt.figure(figsize=(12,8))
plt.plot(train[time_column], train[time_dependent_variable], label='Train')
plt.plot(test[time_column],test[time_dependent_variable], label='Test')
plt.plot(y_hat[time_column],y_hat['pred'], label='Simple Exponential Smoothing using optimized =True')
plt.legend(loc='best')
plt.title("Single Exponential Smoothing Forecast")
plt.show()

### 2) Double Exponential Smoothing Model

In [None]:
# Fitting and forecasting a double exponential model
fitESauto = Holt(train[time_dependent_variable]).fit(optimized= True, use_brute = True)
pred = fitESauto.forecast(len(test))

# Different metrics for evaluating the model
print(timeseries_evaluation_metrics_func(test[time_dependent_variable],pred))
print(fitESauto.summary())

# Creating a column for forecasts for plotting
y_hat = test.copy()
index = [i for i in range(len(train),len(df))]    
pred = pd.Series(pred,index=index)
y_hat["pred"] = pred

# Plotting a graph showing results
plt.figure(figsize=(12,8))
plt.plot(train[time_column], train[time_dependent_variable], label='Train')
plt.plot(test[time_column],test[time_dependent_variable], label='Test')
plt.plot(y_hat[time_column],y_hat['pred'], label='Double Exponential')
plt.legend(loc='best')
plt.title("Double Exponential Smoothing Model")
plt.show()

### 3) Triple Exponential Smoothing Model

In [None]:
# Fitting and forecasting a triple exponential model. Trend and seasonality can be additive or multiplicative
fitESauto = ExponentialSmoothing(train[time_dependent_variable],trend='mul',seasonal='mul',seasonal_periods=12).fit()
pred = fitESauto.forecast(len(test))

# Different metrics for evaluating the model
print(timeseries_evaluation_metrics_func(test[time_dependent_variable],pred))
print(fitESauto.summary())

# Creating a column for forecasts for plotting
y_hat = test.copy()
index = [i for i in range(len(train),len(df))]    
pred = pd.Series(pred,index=index)
y_hat["pred"] = pred

# Plotting a graph showing results
plt.figure(figsize=(12,8))
plt.plot(train[time_column], train[time_dependent_variable], label='Train')
plt.plot(test[time_column],test[time_dependent_variable], label='Test')
plt.plot(y_hat[time_column],y_hat['pred'], label='Triple Exponential')
plt.legend(loc='best')
plt.title("Triple Exponential Smoothing Model")
plt.show()

### 4) Theta Model

This model is famous because of its simplicity and success in performing the best in solving M4 forecasting challenge.
The model is implemented in the following steps:

- Test for seasonality
- Deseasonalize if seasonality detected
- Estimate alpha (SES) by fitting a SES model to the data and beta by OLS.
- Forecast the series
- Reseasonalize if the data was deseasonalized.

This model uses SES and Simple OLS. For more information, https://www.statsmodels.org/stable/examples/notebooks/generated/theta-model.html  

In [None]:
# Make sure that your time_column must be your index column, otherwise it will give an error.
train.index = train[time_column]

# Fitting the model
fitESauto = ThetaModel(train[time_dependent_variable]).fit()
pred = fitESauto.forecast(len(test),theta=np.inf)

# Different metrics for evaluating the model
print(timeseries_evaluation_metrics_func(test[time_dependent_variable],pred))
print(fitESauto.summary())

# Creating a column for forecasts for plotting
y_hat = test.copy()
index = [i for i in range(len(train),len(df))]    
pred = pd.Series(pred,index=index)
y_hat["pred"] = pred

# Plotting a graph showing results
plt.figure(figsize=(12,8))
plt.plot(train[time_column], train[time_dependent_variable], label='Train')
plt.plot(test[time_column],test[time_dependent_variable], label='Test')
plt.plot(y_hat[time_column],y_hat['pred'], label='Theta Model')
plt.legend(loc='best')
plt.title("Theta Model")
plt.show()