# Baseline Models: Time Series Forecasting

In [2]:
path = "https://frenzy86.s3.eu-west-2.amazonaws.com/timeseries/Data/airline_passengers.csv"

In [8]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

#path = 'airline-passengers.csv'

df_passengers = pd.read_csv(path,
                            parse_dates=["Month"],
                            index_col="Month",
                            )

df_passengers = df_passengers.asfreq('MS')
df_passengers

Unnamed: 0_level_0,Thousands of Passengers
Month,Unnamed: 1_level_1
1949-01-01,112
1949-02-01,118
1949-03-01,132
1949-04-01,129
1949-05-01,121
...,...
1960-08-01,606
1960-09-01,508
1960-10-01,461
1960-11-01,390


In [15]:
df_passengers.columns = ['Passengers']

In [16]:
df_passengers.index

DatetimeIndex(['1949-01-01', '1949-02-01', '1949-03-01', '1949-04-01',
               '1949-05-01', '1949-06-01', '1949-07-01', '1949-08-01',
               '1949-09-01', '1949-10-01',
               ...
               '1960-03-01', '1960-04-01', '1960-05-01', '1960-06-01',
               '1960-07-01', '1960-08-01', '1960-09-01', '1960-10-01',
               '1960-11-01', '1960-12-01'],
              dtype='datetime64[ns]', name='Month', length=144, freq='MS')

## Create DataFrame for storing forecasts

In [17]:
horizon = 48  # Next four years (12 months x 4 years)

date_end_raw = df_passengers.index[-1]
date_end_raw

Timestamp('1960-12-01 00:00:00')

In [18]:
date_range = pd.date_range(start=date_end_raw, periods=horizon + 1, freq='MS')
date_range = date_range[1:]
date_range

DatetimeIndex(['1961-01-01', '1961-02-01', '1961-03-01', '1961-04-01',
               '1961-05-01', '1961-06-01', '1961-07-01', '1961-08-01',
               '1961-09-01', '1961-10-01', '1961-11-01', '1961-12-01',
               '1962-01-01', '1962-02-01', '1962-03-01', '1962-04-01',
               '1962-05-01', '1962-06-01', '1962-07-01', '1962-08-01',
               '1962-09-01', '1962-10-01', '1962-11-01', '1962-12-01',
               '1963-01-01', '1963-02-01', '1963-03-01', '1963-04-01',
               '1963-05-01', '1963-06-01', '1963-07-01', '1963-08-01',
               '1963-09-01', '1963-10-01', '1963-11-01', '1963-12-01',
               '1964-01-01', '1964-02-01', '1964-03-01', '1964-04-01',
               '1964-05-01', '1964-06-01', '1964-07-01', '1964-08-01',
               '1964-09-01', '1964-10-01', '1964-11-01', '1964-12-01'],
              dtype='datetime64[ns]', freq='MS')

In [19]:
df_forecast = pd.DataFrame(index=date_range)
df_forecast['Passengers'] = np.nan
df_forecast

Unnamed: 0,Passengers
1961-01-01,
1961-02-01,
1961-03-01,
1961-04-01,
1961-05-01,
1961-06-01,
1961-07-01,
1961-08-01,
1961-09-01,
1961-10-01,


## Prepare Historical Data

In [20]:
data = df_passengers['Passengers'].values
data

array([112, 118, 132, 129, 121, 135, 148, 148, 136, 119, 104, 118, 115,
       126, 141, 135, 125, 149, 170, 170, 158, 133, 114, 140, 145, 150,
       178, 163, 172, 178, 199, 199, 184, 162, 146, 166, 171, 180, 193,
       181, 183, 218, 230, 242, 209, 191, 172, 194, 196, 196, 236, 235,
       229, 243, 264, 272, 237, 211, 180, 201, 204, 188, 235, 227, 234,
       264, 302, 293, 259, 229, 203, 229, 242, 233, 267, 269, 270, 315,
       364, 347, 312, 274, 237, 278, 284, 277, 317, 313, 318, 374, 413,
       405, 355, 306, 271, 306, 315, 301, 356, 348, 355, 422, 465, 467,
       404, 347, 305, 336, 340, 318, 362, 348, 363, 435, 491, 505, 404,
       359, 310, 337, 360, 342, 406, 396, 420, 472, 548, 559, 463, 407,
       362, 405, 417, 391, 419, 461, 472, 535, 622, 606, 508, 461, 390,
       432])

In [21]:
pd.options.plotting.backend = "plotly"

## Last Observation Method

In [22]:
def last_observation_forecast(data, horizon):
    last_obs = data[-1]
    return [last_obs] * horizon

df_forecast['Last Observation'] = last_observation_forecast(data, horizon)

pd.concat([df_passengers, df_forecast]).plot()

## Seasonal Naive Method

In [24]:
def seasonal_naive_forecast(data, horizon, seasonality):
    seasonal_forecasts = []
    for i in range(horizon):
        seasonal_forecasts.append(data[-seasonality + (i % seasonality)])
    return seasonal_forecasts

df_forecast['Seasonal Naive'] = seasonal_naive_forecast(data, horizon, seasonality=12)

pd.concat([df_passengers, df_forecast]).plot()

## Moving Average Method

In [25]:
def moving_average_forecast(data, window_size, horizon):
    for i in range(horizon):
        forecast = data[-window_size:].mean()
        data = np.concatenate([data, [forecast]])
    return data[-horizon:]

df_forecast['Moving Average'] = moving_average_forecast(data, window_size=12, horizon=horizon)

fig = pd.concat([df_passengers, df_forecast]).plot(title='Baseline Models')
fig = fig.update_layout(xaxis_title='Time (months)', yaxis_title='Passengers')
fig

In [26]:
df_passengers

Unnamed: 0_level_0,Passengers
Month,Unnamed: 1_level_1
1949-01-01,112
1949-02-01,118
1949-03-01,132
1949-04-01,129
1949-05-01,121
...,...
1960-08-01,606
1960-09-01,508
1960-10-01,461
1960-11-01,390


In [27]:
df_forecast

Unnamed: 0,Passengers,Last Observation,Seasonal Naive,Moving Average
1961-01-01,,432,417,476.166667
1961-02-01,,432,391,481.097222
1961-03-01,,432,419,488.605324
1961-04-01,,432,461,494.405768
1961-05-01,,432,472,497.189582
1961-06-01,,432,535,499.288714
1961-07-01,,432,622,496.312773
1961-08-01,,432,606,485.838837
1961-09-01,,432,508,475.825407
1961-10-01,,432,461,473.144191
