<a href="https://colab.research.google.com/github/reinhardbuyabo/ICS4102/blob/main/Walk_Forward_Validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!wget -nc https://lazyprogrammer.me/course_files/airline_passengers.csv

--2025-09-23 12:12:35--  https://lazyprogrammer.me/course_files/airline_passengers.csv
Resolving lazyprogrammer.me (lazyprogrammer.me)... 104.21.23.210, 172.67.213.166, 2606:4700:3030::ac43:d5a6, ...
Connecting to lazyprogrammer.me (lazyprogrammer.me)|104.21.23.210|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2036 (2.0K) [text/csv]
Saving to: ‘airline_passengers.csv’


2025-09-23 12:12:35 (36.0 MB/s) - ‘airline_passengers.csv’ saved [2036/2036]



In [2]:
!pip install -U statsmodels



In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import itertools

from sklearn.metrics import mean_squared_error
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [4]:
df = pd.read_csv('airline_passengers.csv', index_col='Month', parse_dates=True)

In [5]:
df.index.freq = 'MS'

In [6]:
df.shape

(144, 1)

In [24]:
h = 12 # horizon
steps  = 10
Ntest = len(df) - h - steps + 1

In [19]:
trend_type_list = ['add', 'mul']
seasonal_type_list = ['add', 'mul']
damped_trend_list = [True, False]
init_method = ['estimated', 'heuristic', 'legacy-heuristic']
use_boxcox_list = [True, False, 0] # lambda=0 corresponds to log transform

In [29]:
def walkforward(
    trend_type,
    seasonal_type,
    damped_trend,
    init_method,
    use_boxcox,
    debug=False
):
    # store errors from each round
  errors = []
  seen_last = False
  steps_completed = 0

  for end_of_train in range(Ntest, len(df) - h + 1):
    train = df.iloc[:end_of_train]
    test = df.iloc[end_of_train:end_of_train + h]

    if test.index[-1] == df.index[-1]:
      seen_last = True

    steps_completed += 1

    hw = ExponentialSmoothing(
        train['Passengers'],
        initialization_method=init_method,
        trend=trend_type,
        damped_trend=damped_trend,
        seasonal=seasonal_type,
        seasonal_periods=12,
        use_boxcox=use_boxcox
    )
    res_hw = hw.fit()

    # compute error for the forecast horizon
    fcast = res_hw.forecast(h)
    error = mean_squared_error(test['Passengers'], fcast)
    errors.append(error)

  if debug:
    print("seen last:", seen_last)
    print("steps completed:", steps_completed)

  return np.mean(errors)

In [30]:
walkforward('add', 'add', False, 'legacy-heuristic', 0, debug=True)

seen last: True
steps completed: 10


np.float64(1448.227487951414)

In [31]:
tuple_of_option_lists = (
    trend_type_list,
    seasonal_type_list,
    damped_trend_list,
    init_method,
    use_boxcox_list
)
for x in itertools.product(*tuple_of_option_lists):
  print(x)

('add', 'add', True, 'estimated', True)
('add', 'add', True, 'estimated', False)
('add', 'add', True, 'estimated', 0)
('add', 'add', True, 'heuristic', True)
('add', 'add', True, 'heuristic', False)
('add', 'add', True, 'heuristic', 0)
('add', 'add', True, 'legacy-heuristic', True)
('add', 'add', True, 'legacy-heuristic', False)
('add', 'add', True, 'legacy-heuristic', 0)
('add', 'add', False, 'estimated', True)
('add', 'add', False, 'estimated', False)
('add', 'add', False, 'estimated', 0)
('add', 'add', False, 'heuristic', True)
('add', 'add', False, 'heuristic', False)
('add', 'add', False, 'heuristic', 0)
('add', 'add', False, 'legacy-heuristic', True)
('add', 'add', False, 'legacy-heuristic', False)
('add', 'add', False, 'legacy-heuristic', 0)
('add', 'mul', True, 'estimated', True)
('add', 'mul', True, 'estimated', False)
('add', 'mul', True, 'estimated', 0)
('add', 'mul', True, 'heuristic', True)
('add', 'mul', True, 'heuristic', False)
('add', 'mul', True, 'heuristic', 0)
('add

In [32]:
best_score = float("inf")
best_options = None
for x in itertools.product(*tuple_of_option_lists):
  score = walkforward(*x)
  if score < best_score:
    print("Best so far:", score, x)
    best_score = score
    best_options = x

Best so far: 412.70439433481533 ('add', 'add', True, 'estimated', True)
Best so far: 397.51642337228634 ('add', 'add', True, 'heuristic', True)
Best so far: 368.8467123043263 ('add', 'add', True, 'legacy-heuristic', True)
Best so far: 320.68497356823144 ('add', 'add', False, 'estimated', True)
Best so far: 307.9434782402395 ('add', 'add', False, 'legacy-heuristic', True)
Best so far: 300.95995233097807 ('mul', 'add', True, 'legacy-heuristic', False)
Best so far: 255.99878409662014 ('mul', 'add', False, 'estimated', False)
Best so far: 249.58266478996705 ('mul', 'add', False, 'legacy-heuristic', False)


In [35]:
print("best score:", best_score)

trend_type, seasonal_type, damped_trend, init_method, use_boxcox = best_options
print("trend_type:", trend_type)
print("seasonal_type:", seasonal_type)
print("damped_trend:", damped_trend)
print("init_method:", init_method)
print("use_boxcox:", use_boxcox)

best score: 249.58266478996705
trend_type: mul
seasonal_type: add
damped_trend: False
init_method: legacy-heuristic
use_boxcox: False
