In [3]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import itertools

from sklearn.metrics import mean_squared_error
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [4]:
df = pd.read_csv("airline_passengers.csv", index_col="Month", parse_dates=True)
df.index.freq = 'MS'
df.head()

Unnamed: 0_level_0,Passengers
Month,Unnamed: 1_level_1
1949-01-01,112
1949-02-01,118
1949-03-01,132
1949-04-01,129
1949-05-01,121


In [5]:
df.shape

(144, 1)

In [7]:
# Assume the forecast horizon we care about is 12
# Validate over 10 steps
h = 12
steps = 10
Ntest = len(df) - h - steps + 1
print(f"h is {h}, steps is {steps}, Ntest is {Ntest}")

h is 12, steps is 10, Ntest is 123


In [9]:
# configuration hyperparameters to try
trend_type_list = ["add","mul"]
seasonal_type_list = ["add", "mul"]
damped_trend_list = [True, False]
init_method_list = ["estimated","heuristic","legacy-heuristic"]
use_boxcox_list = [True, False, 0]

In [10]:
def walkforward(trend_type, seasonal_type,damped_trend,init_method,use_boxcox,debug=False):
    errors=[]
    seen_last = False
    steps_completed = 0

    for end_of_train in range(Ntest,len(df)-h+1):
        # we don't have to manually add the data to our dataset
        # just index it at the right points - this is a view not a copy
        # so it doesn't take up any extra space or computation
        train = df.iloc[:end_of_train]
        test = df.iloc[end_of_train : end_of_train + h]

        if test.index[-1] == df.index[-1]:
            seen_last = True
        
        steps_completed += 1

        hw = ExponentialSmoothing(train["Passengers"], initialization_method=init_method, trend=trend_type,damped_trend=damped_trend, seasonal=seasonal_type,seasonal_periods=12,use_boxcox=use_boxcox)
        res_hw = hw.fit()

        # compute error for the forecast horizon
        fcast = res_hw.forecast(h)
        error = mean_squared_error(test["Passengers"],fcast)
        errors.append(error)

        if debug:
            print(f"seen_last : {seen_last}")
            print(f"steps completed : {steps_completed}")
    
    return np.mean(errors)

In [12]:
walkforward("add","add",False,"legacy-heuristic",0,debug=True)

seen_last : False
steps completed : 1
seen_last : False
steps completed : 2
seen_last : False
steps completed : 3
seen_last : False
steps completed : 4
seen_last : False
steps completed : 5
seen_last : False
steps completed : 6
seen_last : False
steps completed : 7
seen_last : False
steps completed : 8
seen_last : False
steps completed : 9
seen_last : True
steps completed : 10


1448.5322380644711

In [13]:
# iterate through all possible options
tuple_of_option_lists = (trend_type_list,seasonal_type_list,damped_trend_list,init_method_list,use_boxcox_list)

for x in itertools.product(*tuple_of_option_lists):
    print(x)

('add', 'add', True, 'estimated', True)
('add', 'add', True, 'estimated', False)
('add', 'add', True, 'estimated', 0)
('add', 'add', True, 'heuristic', True)
('add', 'add', True, 'heuristic', False)
('add', 'add', True, 'heuristic', 0)
('add', 'add', True, 'legacy-heuristic', True)
('add', 'add', True, 'legacy-heuristic', False)
('add', 'add', True, 'legacy-heuristic', 0)
('add', 'add', False, 'estimated', True)
('add', 'add', False, 'estimated', False)
('add', 'add', False, 'estimated', 0)
('add', 'add', False, 'heuristic', True)
('add', 'add', False, 'heuristic', False)
('add', 'add', False, 'heuristic', 0)
('add', 'add', False, 'legacy-heuristic', True)
('add', 'add', False, 'legacy-heuristic', False)
('add', 'add', False, 'legacy-heuristic', 0)
('add', 'mul', True, 'estimated', True)
('add', 'mul', True, 'estimated', False)
('add', 'mul', True, 'estimated', 0)
('add', 'mul', True, 'heuristic', True)
('add', 'mul', True, 'heuristic', False)
('add', 'mul', True, 'heuristic', 0)
('add

In [15]:
best_score = float("inf")
best_options = None

for x in itertools.product(*tuple_of_option_lists):
    score = walkforward(*x)

    if score < best_score:
        best_score = score
        best_options = x
        print(f"best score so far : {best_score}, with options : {best_options}")


best score so far : 412.8172176010753, with options : ('add', 'add', True, 'estimated', True)
best score so far : 397.5872318721034, with options : ('add', 'add', True, 'heuristic', True)
best score so far : 368.78748763006257, with options : ('add', 'add', True, 'legacy-heuristic', True)
best score so far : 320.6640288427434, with options : ('add', 'add', False, 'estimated', True)
best score so far : 308.1359664473358, with options : ('add', 'add', False, 'legacy-heuristic', True)


  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err


best score so far : 302.51546652792945, with options : ('mul', 'add', False, 'estimated', False)
best score so far : 299.82254446452646, with options : ('mul', 'add', False, 'heuristic', True)
best score so far : 261.88562598395254, with options : ('mul', 'add', False, 'heuristic', False)
best score so far : 249.5890588815454, with options : ('mul', 'add', False, 'legacy-heuristic', False)


  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err


In [17]:
print(f"best score is {best_score}")
trend_type,seasonal_type,damped_trend,init_method,use_boxcox=best_options
print(f"trend type : {trend_type}")
print(f"seasonal type : {seasonal_type}")
print(f"damped_trend : {damped_trend}")
print(f"initialization_method : {init_method}")
print(f"use boxcox : {use_boxcox}")

best score is 249.5890588815454
trend type : mul
seasonal type : add
damped_trend : False
initialization_method : legacy-heuristic
use boxcox : False
