In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from statsmodels.tsa.seasonal import MSTL
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.graphics.tsaplots import plot_acf

# Exponential Smoothing

In [None]:
LOCATION = "Nelson St"

In [None]:
cycle_counts = pd.read_csv("cycle_counts.csv", parse_dates=["time"])
cycle_counts = cycle_counts[cycle_counts["location"] == LOCATION]

In [None]:
fig, ax = plt.subplots()
ax.plot(cycle_counts["time"], cycle_counts["count"])
ax.set(title=LOCATION, ylabel="Count")
for tick in ax.get_xticklabels():
    tick.set_rotation(45)
fig.tight_layout();

## STL Decomposition

In [None]:
cycle_counts["time"] = pd.to_datetime(cycle_counts["time"])
cycle_counts = cycle_counts.set_index("time").drop(columns=["location"])
cycle_counts = cycle_counts.resample("D").sum()
cycle_counts = cycle_counts.fillna(0)

In [None]:
stl = MSTL(cycle_counts["count"], periods=7)
result = stl.fit()
result.plot();

## Exponential Smoothing

In [None]:
model = ExponentialSmoothing(
    cycle_counts["count"],
    trend=None,
    damped_trend=False,
    seasonal="add",
    seasonal_periods=7
)
model = model.fit()
y_hat = model.predict(start=cycle_counts.index[0], end=cycle_counts.index[-1])

In [None]:
fig, ax = plt.subplots()
ax.plot(cycle_counts["count"], label="Observed")
ax.plot(y_hat, label="Fitted")
ax.set(title=LOCATION, ylabel="Count")
for tick in ax.get_xticklabels():
    tick.set_rotation(45)
ax.legend()
fig.tight_layout();

### Residuals

In [None]:
resid = cycle_counts["count"] - y_hat

fig, ax = plt.subplots(2, 2, figsize=(12, 5.5))
ax = ax.flatten()

ax[0].hist(np.array(resid), bins=20)
ax[0].set(ylabel="Frequency", xlabel="Residuals")

ax[1].axhline(0, color="black", linestyle="--")
ax[1].scatter(np.array(cycle_counts["count"]), np.array(resid))
ax[1].set(xlabel="Observed", ylabel="Residuals")

ax[2].plot(cycle_counts["count"].index, np.array(resid))
ax[2].set(ylabel="Residuals")
for tick in ax[2].get_xticklabels():
    tick.set_rotation(45)

plot_acf(resid, ax=ax[3])
ax[3].set(title="", xlabel="Lag", ylabel="ACF")

fig.tight_layout();

## Cross-validation

In [None]:
class TimeSeriesSplitter:
    def __init__(self, n_splits: int, window: int, gap: int):
        self.n_splits = n_splits
        self.window = window
        self.gap = gap

    def split(self, y: pd.Series):
        n = len(y)
        start_idx = n - self.window - self.gap * (self.n_splits - 1)
        for _ in range(self.n_splits):
            end_idx = start_idx + self.window
            yield start_idx, end_idx
            start_idx = start_idx + self.gap

In [None]:
actuals = []
forecasts = []
maes = []

splitter = TimeSeriesSplitter(n_splits=5, window=14, gap=5)
for val_start, val_end in splitter.split(cycle_counts["count"]):
    train = cycle_counts["count"].iloc[:val_start - 1]
    val = cycle_counts["count"].iloc[val_start:val_end]

    model = ExponentialSmoothing(
        train,
        trend=None,
        damped_trend=False,
        seasonal="add",
        seasonal_periods=7
    )
    model = model.fit()
    y_hat = model.predict(start=val.index[0], end=val.index[-1])

    actuals.append(val)
    forecasts.append(y_hat)
    maes.append(np.mean(np.abs(val - y_hat)))

print(f"Mean MAE: {np.mean(maes)}")

In [None]:
# Plot a specific fold

fold_idx = -1

val_start = actuals[fold_idx].index[0]
train = cycle_counts["count"].loc[:val_start].iloc[-30:]

fig, ax = plt.subplots()
train.plot(lw=2.5, color="black", label="")
actuals[fold_idx].plot(lw=2.5, color="blue", label="y_true")
forecasts[fold_idx].plot(lw=2.5, color="blue", ls="--", label="y_hat")
ax.legend(ncols=2, loc=2)
ax.set(xlabel="", ylabel="Counts", ylim=(100, 1100))
fig.tight_layout();

## Forecasting

In [None]:
n_test = 21

train = cycle_counts["count"].iloc[: -n_test]
val = cycle_counts["count"].iloc[-n_test:]

model = ExponentialSmoothing(
    train,
    trend=None,
    damped_trend=False,
    seasonal="add",
    seasonal_periods=7
)
model = model.fit()

trajectories = model.simulate(
    nsimulations=n_test,
    anchor=val.index[0],
    repetitions=100,
    error="add",
)

trajectories = trajectories.assign(
    **{
        f"quantile_{q}": trajectories.quantile(q=q, axis=1)
        for q in [0.025, 0.05, 0.5, 0.95, 0.975]
    }
)

In [None]:
fig, ax = plt.subplots()

ax.plot(train.index.values[-50:], train.values[-50:], lw=2.5, color="black")

ax.fill_between(
    trajectories.index.values,
    trajectories["quantile_0.025"].values,
    trajectories["quantile_0.975"].values,
    alpha=0.15,
    color="blue"
)
ax.fill_between(
    trajectories.index.values,
    trajectories["quantile_0.05"].values,
    trajectories["quantile_0.95"].values,
    alpha=0.2,
    color="blue"
)
ax.plot(
    trajectories.index.values,
    trajectories["quantile_0.5"].values,
    lw=2.5,
    color="blue",
    ls="--"
)

ax.plot(val.index.values, val.values, lw=2.5, color="blue")
ax.set(ylabel="Counts")

fig.tight_layout();