In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from statsmodels.tsa.holtwinters import ExponentialSmoothing

import itertools

from sklearn.metrics import mean_squared_error

#For inline plotting 
%matplotlib inline                 
%config InlineBackend.figure_format = 'svg'

plt.style.use("seaborn-v0_8-dark")  

In [2]:
# !wget -nc https://lazyprogrammer.me/course_files/airline_passengers.csv
# uncomment to get the data

df = pd.read_csv('airline_passengers.csv', index_col=0, parse_dates=True)

In [3]:
df.index.freq = 'MS'

In [4]:
df.shape

(144, 1)

In [5]:
# Assume that the forecast horizon we care about is 12
# validate each model over 10 steps 

h = 12
steps = 10

Ntest = len(df) - (h+steps) + 1 #the effective validation period 

In [6]:
# Different hyperparameters to try

trend_type_list = ['add', 'mul']

season_type_list = ['add', 'mul']

damped_trend_list = [True, False]

init_method_list = ['estimated', 'heuristic', 'legacy-heuristic']

use_boxcox_list = [True, False, 0]

In [7]:
def walk_forward(trend_type, season_type, damped_trend, init_method, use_boxcox, debug=False):

    #store errors

    errors = list()
    
    # to check if we have seen the last data point in the whole data set
    seen_last = False

    # to check if we went through all the walk forward steps
    steps_completed = 0

    for end_of_train in range(Ntest, len(df)-h+1):
        
        train = df.iloc[:end_of_train]
        test = df.iloc[end_of_train:end_of_train+h]

        if test.index[-1] == df.index[-1]:

            seen_last = True
        
        steps_completed += 1

        hw = ExponentialSmoothing(train['Passengers'],
                                   initialization_method=init_method,
                                    trend = trend_type,
                                    damped_trend = damped_trend,
                                    seasonal = season_type,
                                    seasonal_periods=12,
                                    use_boxcox=use_boxcox)
        res_hw = hw.fit()

        # compute test errors for the forecast horizon h

        fcast = res_hw.forecast(h)
        mse = mean_squared_error(test['Passengers'],fcast)
        errors.append(mse)

    if debug:

        print('seen_last:', seen_last)
        print('steps_completed:', steps_completed)

    return np.mean(errors)    


In [8]:
# test the function

walk_forward('add','add', False, 'legacy-heuristic', 0, debug = True)

seen_last: True
steps_completed: 10


1448.5344452151644

In [9]:
# Create tuples that contain all possible model parameter combinations 

tuple_of_option_lists = (trend_type_list,season_type_list,damped_trend_list,init_method_list,use_boxcox_list)

for x in itertools.product(*tuple_of_option_lists):
    print(x)

('add', 'add', True, 'estimated', True)
('add', 'add', True, 'estimated', False)
('add', 'add', True, 'estimated', 0)
('add', 'add', True, 'heuristic', True)
('add', 'add', True, 'heuristic', False)
('add', 'add', True, 'heuristic', 0)
('add', 'add', True, 'legacy-heuristic', True)
('add', 'add', True, 'legacy-heuristic', False)
('add', 'add', True, 'legacy-heuristic', 0)
('add', 'add', False, 'estimated', True)
('add', 'add', False, 'estimated', False)
('add', 'add', False, 'estimated', 0)
('add', 'add', False, 'heuristic', True)
('add', 'add', False, 'heuristic', False)
('add', 'add', False, 'heuristic', 0)
('add', 'add', False, 'legacy-heuristic', True)
('add', 'add', False, 'legacy-heuristic', False)
('add', 'add', False, 'legacy-heuristic', 0)
('add', 'mul', True, 'estimated', True)
('add', 'mul', True, 'estimated', False)
('add', 'mul', True, 'estimated', 0)
('add', 'mul', True, 'heuristic', True)
('add', 'mul', True, 'heuristic', False)
('add', 'mul', True, 'heuristic', 0)
('add

In [10]:
best_score = float('inf')
best_option = None

for x in itertools.product(*tuple_of_option_lists):

    score = walk_forward(*x)

    if score < best_score:

        print('Best score so far:', score)

        best_score = score
        best_option = x






Best score so far: 412.8172272556244
Best score so far: 397.58730127020334
Best score so far: 368.7874966634643
Best score so far: 320.6640905637339
Best score so far: 308.13601902743665


  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err


Best score so far: 305.6593349312611
Best score so far: 299.8215512496484
Best score so far: 261.8795073548492
Best score so far: 249.57507607273482


  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err


In [12]:
print('Best score:', best_score)

Best score: 249.57507607273482


In [14]:
best_trend, best_season, best_damped_trend, best_init, best_boxcox = best_option

print("optimal trend type:", best_trend)
print("optimal seasonality type:", best_season)
print("optimal damped trend type:", best_damped_trend)
print("optimal initilization method:", best_init)
print("optimal box-cox option:", best_boxcox)



optimal trend type: mul
optimal seasonality type: add
optimal damped trend type: False
optimal initilization method: legacy-heuristic
optimal box-cox option: False
