Theta model 

Steps:
- Test for seasonality
- Deseasonalize if needed
- estimate alpha by fitting SES model and beta with OLS
- Forecast series
- Reseasonalize if needed

In [None]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import pandas_datareader as pdr
import seaborn as sns
from statsmodels.tsa.forecasting.theta import ThetaModel

plt.rc("figure", figsize=(16, 8))
plt.rc("font", size=15)
plt.rc("lines", linewidth=3)
sns.set_style("darkgrid")

Data

In [None]:
reader = pdr.fred.FredReader(
    ["HOUST"], start="1980-01-01", end="2020-04-01"
)
data = reader.read()
housing = data.HOUST
housing.index.freq = housing.index.inferred_freq

plt.figure()
plt.plot(housing)
plt.xlabel("DATE")
plt.plot()

Fit theta model

In [None]:
tm = ThetaModel(housing)
result = tm.fit()

print(result.summary())

Hedgehog plot of forecast

In [None]:
forecasts = {"housing": housing}
forecast_period = 24
year_range = list(map(str, range(1990, 2020, 2)))

# loop data
for year in year_range:
    sub_selection = housing[:year]
    results = ThetaModel(sub_selection).fit()
    
    fcast = results.forecast(forecast_period)
    forecasts[year] = fcast
forecasts = pd.DataFrame(forecasts)

In [None]:
# show plot
fig, ax = plt.subplots()
ax.plot(housing[year_range[0]:],
        color='grey')
for year in year_range:
    ax.plot(forecasts[[year]])
ax.set_title("Housing Starts")
plt.tight_layout(pad=1.0)
plt.show()

Personal expenditures data

In [None]:
reader = pdr.fred.FredReader(
    ['NA000349Q'], start='1980-01-01', end='2020-04-01'
)
pce = reader.read()
pce.columns = ['PCE']
pce.index.freq = 'QS-OCT'

print(pce.head())

plt.figure()
plt.plot(pce, label='PCE')
plt.legend()
plt.xlabel('DATE')
plt.show()

In [None]:
# model ln since series is always positive
pce_log = np.log(pce)
tm = ThetaModel(pce_log)
result = tm.fit()

print(result.summary())

In [None]:
# exploring different theta values
forecasts = pd.DataFrame(pce_log).rename(columns={'PCE': 'pce_ln'}).copy()

theta_range = [1, 2, 3, 4, np.inf]
for theta in theta_range:
    forecasts = pd.concat(
        [forecasts,result.forecast(12, theta=theta)], axis=1
        ).rename(
            columns={'forecast': f'theta_{theta}'})

In [None]:
fig, ax = plt.subplots(dpi=100, tight_layout=True)
ax.plot(pce_log, color='grey', label='data')
for theta in theta_range:
    ax.plot(forecasts[f'theta_{theta}'],
            label=f'theta={theta}')
ax.set_title("PCE")
plt.xlim([forecasts.index[-36],
          forecasts.index[-1]])
plt.ylim([14.9, 15.2])
# plt.tight_layout(pad=1.0)
plt.legend()
plt.show()

In [None]:
_ = results.plot_predict(24, theta=2)