# Baseline models for time series forecasting

In [1]:

from modules import utils
utils.configure_plotly_template(showlegend=True)

## Data

In [2]:
import pandas as pd

df = pd.read_parquet('../../../data/statsmodels/AirPassengers.parquet').asfreq('ME')
df.columns = ['values']

df

Unnamed: 0,values
1949-01-31,112
1949-02-28,118
...,...
1960-11-30,390
1960-12-31,432


In [3]:
series = df['values'].values
series

array([112, 118, 132, 129, 121, 135, 148, 148, 136, 119, 104, 118, 115,
       126, 141, 135, 125, 149, 170, 170, 158, 133, 114, 140, 145, 150,
       178, 163, 172, 178, 199, 199, 184, 162, 146, 166, 171, 180, 193,
       181, 183, 218, 230, 242, 209, 191, 172, 194, 196, 196, 236, 235,
       229, 243, 264, 272, 237, 211, 180, 201, 204, 188, 235, 227, 234,
       264, 302, 293, 259, 229, 203, 229, 242, 233, 267, 269, 270, 315,
       364, 347, 312, 274, 237, 278, 284, 277, 317, 313, 318, 374, 413,
       405, 355, 306, 271, 306, 315, 301, 356, 348, 355, 422, 465, 467,
       404, 347, 305, 336, 340, 318, 362, 348, 363, 435, 491, 505, 404,
       359, 310, 337, 360, 342, 406, 396, 420, 472, 548, 559, 463, 407,
       362, 405, 417, 391, 419, 461, 472, 535, 622, 606, 508, 461, 390,
       432])

## Prepare DataFrame for forecasts

In [4]:
HORIZON = 12 * 4

start = df.index.max() + pd.DateOffset(months=1)
end = start + pd.DateOffset(months=HORIZON)

start, end

(Timestamp('1961-01-31 00:00:00'), Timestamp('1965-01-31 00:00:00'))

In [5]:
df_forecast = pd.DataFrame(index=pd.date_range(start=start, end=end, freq="MS"))
df_forecast

1961-02-01
1961-03-01
...
1964-12-01
1965-01-01


## Last observation method

In [6]:
df_forecast["last_observation"] = series[-1]
df_forecast

Unnamed: 0,last_observation
1961-02-01,432
1961-03-01,432
...,...
1964-12-01,432
1965-01-01,432


In [7]:
pd.concat([df, df_forecast]).plot()

## Seasonal naive method

In [8]:
HORIZON = 48
seasonality = 12

last_season = series[-seasonality:]

forecasts = []
for step in range(HORIZON):
    forecast = last_season[step % seasonality]
    forecasts.append(forecast)

idx = df_forecast.index[:HORIZON]
df_forecast.loc[idx, "seasonal_naive"] = forecasts

df_forecast

Unnamed: 0,last_observation,seasonal_naive
1961-02-01,432,417.0
1961-03-01,432,391.0
...,...,...
1964-12-01,432,390.0
1965-01-01,432,432.0


In [9]:
pd.concat([df, df_forecast]).plot()

## Moving average method

In [10]:
import numpy as np

In [13]:
HORIZON = 48
window_size = 12

series_forecast = series.copy()

forecasts = []

for step in range(HORIZON):
    forecast = series_forecast[-window_size:].mean()
    forecasts.append(forecast)
    series_forecast = np.concatenate([series_forecast, [forecast]])

idx = df_forecast.index[:HORIZON]
df_forecast.loc[idx, "moving_average"] = forecasts

df_forecast

Unnamed: 0,last_observation,seasonal_naive,moving_average
1961-02-01,432,417.0,476.166667
1961-03-01,432,391.0,481.097222
...,...,...,...
1964-12-01,432,390.0,483.712156
1965-01-01,432,432.0,483.707130


In [12]:
pd.concat([df, df_forecast]).plot()