In [None]:
# Source libraries
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib
import warnings

import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

warnings.filterwarnings('ignore')
plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = 15, 7

# Importing Data
- Dataset: Australian beer production on 1956 - 1995

In [None]:
# Load raw data
df_raw = pd.read_csv("/kaggle/input/time-series-datasets/monthly-beer-production-in-austr.csv", 
                   parse_dates=['Month'], index_col='Month')
df_raw.columns = ['Production']

In [None]:
# Basic prediction settings
seasonal_period = 12     # what is the length of seasonal cycle
tuning_test_period_years = 3  # number of months for parameter tuning
tuning_test_period_months = 12 * tuning_test_period_years # number of months for parameter tuning

use_data_till_year = 1969
train_data_till_year = use_data_till_year - tuning_test_period_years

In [None]:
# change type from number to string 
use_data_till_year = str(use_data_till_year)
train_data_till_year = str(train_data_till_year)

# Select only data till specified year (including)
df_till_1969 = df_raw.loc[df_raw.index < use_data_till_year]

# Test-train data split
df_train = df_till_1969[df_till_1969.index < train_data_till_year]
df_test = df_till_1969[df_till_1969.index >= train_data_till_year]

In [None]:
# Train-test split plot
plt.plot(df_train)
plt.plot(df_test)

In [None]:
# Grid search parameter tuning - 1st iteration - broad search

smoothing_levels = [0.3, 0.5, 0.7, 0.9]
smoothing_trends = [0.01, 0.1, 0.3]
smoothing_seasonal = [0.01, 0.1, 0.3, 0.5, 0.7, 0.9]

component_type = ['add', 'mul']
dampening = [0.95, 0.98, 0.99, 1]

df_tuning = pd.DataFrame(columns=['level', 'trend', 'seasonal', 'component', 'dampening', 'RMSE'])

for i in smoothing_levels:
    for j in smoothing_trends:
        for k in smoothing_seasonal:
            for l in component_type:
                for m in dampening:
            
                    tmp_model = (ExponentialSmoothing(df_train,
                                                       trend=l, 
                                                       seasonal=l,
                                                       seasonal_periods=seasonal_period,
                                                       damped_trend = True)
                                   .fit(smoothing_level=i,
                                       smoothing_trend=j,
                                       smoothing_seasonal=k,
                                       damping_trend = m))

                    tmp_forecast = tmp_model.forecast(tuning_test_period_months)

                    df_tuning = df_tuning.append({'level': i, 'trend': j, 'seasonal': k, 'component': l, 'dampening': m, 
                                                  'RMSE': np.sqrt(mean_squared_error(df_test, tmp_forecast))}, 
                                                 ignore_index=True)    

In [None]:
df_tuning.sort_values(by='RMSE').head(20)

In [None]:
# Grid search parameter tuning - 2nd iteration - finer view

smoothing_levels = [0.3, 0.5, 0.7, 0.9]
smoothing_trends = [0.01, 0.05, 0.1, 0.2, 0.3]
smoothing_seasonal = [0.01, 0.03, 0.05, 0.07, 0.1, 0.2, 0.3, 0.4]
component_type = ['mul']
dampening = [0.95, 0.96, 0.97, 0.98, 0.99, 1]

df_tuning2 = pd.DataFrame(columns=['level', 'trend', 'seasonal', 'component', 'dampening', 'RMSE'])

for i in smoothing_levels:
    for j in smoothing_trends:
        for k in smoothing_seasonal:
            for l in component_type:
                for m in dampening:
            
                    tmp_model = (ExponentialSmoothing(df_train,
                                                       trend=l, 
                                                       seasonal=l,
                                                       seasonal_periods=seasonal_period,
                                                       damped_trend = True)
                                   .fit(smoothing_level=i,
                                       smoothing_trend=j,
                                       smoothing_seasonal=k,
                                       damping_trend = m))

                    tmp_forecast = tmp_model.forecast(tuning_test_period_months)

                    df_tuning2 = df_tuning2.append({'level': i, 'trend': j, 'seasonal': k, 'component': l, 'dampening': m, 
                                                  'RMSE': np.sqrt(mean_squared_error(df_test, tmp_forecast))}, 
                                                 ignore_index=True) 

In [None]:
df_tuning2.sort_values(by='RMSE').head(20)

In [None]:
prediction_horizon = 5 * 12  # Predict 5 years

final_model = (ExponentialSmoothing(df_till_1969,
                                   trend='mul',
                                   seasonal='mul',
                                   seasonal_periods=seasonal_period,
                                   damped_trend = True)
               .fit(smoothing_level=0.9,
                   smoothing_trend=0.05,
                   smoothing_seasonal=0.1,
                   damping_trend = 0.99))



In [None]:
# Final prediction for 5 years
forecast_1 = final_model.forecast(5 * 12)

# Final prediction for 20 years
forecast_2 = final_model.forecast(20 * 12)

In [None]:
# Plot 5 years forecast
forecast_1.plot()

In [None]:
# Plot 20 years forecast
forecast_2.plot()

## Fast-forward into the "future"

In [None]:
# full time series
df_raw.plot(figsize=(15, 7))
forecast_1.plot()
plt.title('Beer Production - 5 year forecast')
plt.ylabel('megalitres')
plt.show()

In [None]:
# full time series
df_raw.plot(figsize=(15, 7))
forecast_2.plot()
plt.title('Beer Production - 20 year forecast')
plt.ylabel('megalitres')
plt.show()

In [None]:
forecast_1 = forecast_1.to_frame()
forecast_1.columns = ['prediction']

forecast_2 = forecast_2.to_frame()
forecast_2.columns = ['prediction']

In [None]:
forecast_1_w_actuals = forecast_1.join(df_raw)
forecast_2_w_actuals = forecast_2.join(df_raw)

In [None]:
forecast_2_w_actuals

In [None]:
# Forecast 1 - Final Accuracy - Error measures 
np.sqrt(mean_squared_error(forecast_1_w_actuals.Production, forecast_1_w_actuals.prediction))

In [None]:
# Forecast 1 - Final Accuracy - Error measures 
np.sqrt(mean_squared_error(forecast_2_w_actuals.Production, forecast_2_w_actuals.prediction))

In [None]:
# MAPE computation, just copied from here: https://stackoverflow.com/questions/47648133/mape-calculation-in-python
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [None]:
mean_absolute_percentage_error(forecast_1_w_actuals.Production, forecast_1_w_actuals.prediction)

In [None]:
mean_absolute_percentage_error(forecast_2_w_actuals.Production, forecast_2_w_actuals.prediction)

### Did anyone got a better RMSE than

 8.21  for forecast 1 or
28.53  for forecast 2 ?
