In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
!curl -O  https://lazyprogrammer.me/course_files/airline_passengers.csv

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100  2036  100  2036    0     0   1793      0  0:00:01  0:00:01 --:--:--  1798
100  2036  100  2036    0     0   1792      0  0:00:01  0:00:01 --:--:--  1798


In [3]:
df=pd.read_csv('airline_passengers.csv',index_col='Month',parse_dates=True)
df.head()

Unnamed: 0_level_0,Passengers
Month,Unnamed: 1_level_1
1949-01-01,112
1949-02-01,118
1949-03-01,132
1949-04-01,129
1949-05-01,121


In [4]:
import itertools

from sklearn.metrics import mean_squared_error
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [5]:
df.index.freq='MS'

In [6]:
df.shape

(144, 1)

In [7]:
# Assume the forecast horizon we care about is 12
# Validation over 10 steps
h=12
steps=10
Ntest=len(df)-h-steps+1

In [8]:
# Configuration hyperparameters to try
trend_type_list=['add','mul']
seasonal_type_list=['add','mul']
damped_trend_list=[True,False]
init_method_list=['estimated','heuristic','legacy-heuristic']
use_boxcox_list=[True,False,0]

In [9]:
def walkforward(
trend_type,
seasonal_type,
damped_trend,
init_method,
use_boxcox,
debug=False):
    
    # Store errors
    errors=[]
    seen_last=False
    steps_completed=0
    
    for end_of_train in range(Ntest,len(df)-h+1):
        # We don't have to manually 'add' data to our dataset
        # just index it at the right points- this is view and not a copy
        # So it doesn't take up any extra space or computation
        train=df.iloc[:end_of_train]
        test =df.iloc[end_of_train:end_of_train+h]
        
        if test.index[-1]==df.index[-1]:
            seen_last=True
        
        steps_completed +=1
        
    hw = ExponentialSmoothing(
    train['Passengers'],
    initialization_method=init_method,
    trend=trend_type,
    damped_trend=damped_trend,
    seasonal=seasonal_type,
    seasonal_periods=12,
    use_boxcox=use_boxcox)
    
    res_hw=hw.fit()
    
    # Compute error for the forecast horizon
    fcast=res_hw.forecast(h)
    error=mean_squared_error(test['Passengers'],fcast)
    errors.append(error)
    
    if debug:
        print('seen_last:',seen_last)
        print('steps completed:',steps_completed)
        
    return np.mean(errors)

In [10]:
# test our function
walkforward('add','add',False,'legacy-heuristic',0,debug=True)

seen_last: True
steps completed: 10


708.2967560540783

In [11]:
# iterate through all possible options(i.e. grid search)
tuple_of_option_lists=(
trend_type_list,
seasonal_type_list,
damped_trend_list,
init_method_list,
use_boxcox_list,
)
for x in itertools.product(*tuple_of_option_lists):
    print(x)

('add', 'add', True, 'estimated', True)
('add', 'add', True, 'estimated', False)
('add', 'add', True, 'estimated', 0)
('add', 'add', True, 'heuristic', True)
('add', 'add', True, 'heuristic', False)
('add', 'add', True, 'heuristic', 0)
('add', 'add', True, 'legacy-heuristic', True)
('add', 'add', True, 'legacy-heuristic', False)
('add', 'add', True, 'legacy-heuristic', 0)
('add', 'add', False, 'estimated', True)
('add', 'add', False, 'estimated', False)
('add', 'add', False, 'estimated', 0)
('add', 'add', False, 'heuristic', True)
('add', 'add', False, 'heuristic', False)
('add', 'add', False, 'heuristic', 0)
('add', 'add', False, 'legacy-heuristic', True)
('add', 'add', False, 'legacy-heuristic', False)
('add', 'add', False, 'legacy-heuristic', 0)
('add', 'mul', True, 'estimated', True)
('add', 'mul', True, 'estimated', False)
('add', 'mul', True, 'estimated', 0)
('add', 'mul', True, 'heuristic', True)
('add', 'mul', True, 'heuristic', False)
('add', 'mul', True, 'heuristic', 0)
('add

In [12]:
best_score=float('inf')
best_options=None
for x in itertools.product(*tuple_of_option_lists):
    score=walkforward(*x)
    
    if score<best_score:
        print('Best Score so Far:',score)
        best_score=score
        best_options=x
        
## Overflow is not an issue

Best Score so Far: 265.3716003370259
Best Score so Far: 251.33118439083964
Best Score so Far: 237.53660728440934


  return err.T @ err
  return err.T @ err


In [14]:
print('best score:',best_score)
trend_type,seasonal_type,damped_trend,init_method,use_boxcox=best_options
print('trend_type:',trend_type)
print('seasonal_type:',seasonal_type)
print('damped_trend:',damped_trend)
print('init_method:',init_method)
print('use_boxcox:',use_boxcox)

best score: 237.53660728440934
trend_type: add
seasonal_type: add
damped_trend: True
init_method: heuristic
use_boxcox: False
