In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.dates import AutoDateLocator, AutoDateFormatter

from scipy.optimize import minimize

from prophet import Prophet
from prophet.plot import add_changepoints_to_plot

# Prophet

In [None]:
LOCATION = "Nelson Street"

In [None]:
cycle_counts_path = "../cycle_counts.csv"
cycle_counts = pd.read_csv(cycle_counts_path, parse_dates=["date"])
cycle_counts = cycle_counts[cycle_counts["location"] == LOCATION]

In [None]:
fig, ax = plt.subplots()
ax.plot(cycle_counts["date"], cycle_counts["count"], lw=1.5)
ax.set(title=LOCATION, ylabel="Count")
for tick in ax.get_xticklabels():
    tick.set_rotation(45)
fig.tight_layout();

In [None]:
cycle_counts["date"] = pd.to_datetime(cycle_counts["date"])
cycle_counts = cycle_counts.set_index("date").drop(columns=["location"])
cycle_counts = cycle_counts.resample("D").sum()
cycle_counts = cycle_counts.fillna(0)

## Fourier Series

In [None]:
y = np.array(cycle_counts["count"])
n_samples = len(y)

y_fft = np.fft.fft(y)
x_fft = np.fft.fftfreq(n_samples, d=1)  # sampling frequency of 1 / day

# For real valued series, the FFT is symmetric so only plot the positive half of the frequencies
fig, ax = plt.subplots()
ax.plot(
    x_fft[:n_samples // 2],
    1 / n_samples * np.abs(y_fft)[:n_samples // 2],
    lw=2,
)
for i in [1, 2, 3]:
    ax.axvline(i/7, color="grey", ls="--")
ax.set(xlabel="Freq (1 / day)", ylabel="Magnitude")
fig.tight_layout();

In [None]:
# Seaonality as a partial fourier sum

y = np.array(cycle_counts["count"])
y_scaled = y - y.mean()
n_samples = len(y)

period = 7
ks = np.array([1, 2, 3])
ts = np.arange(n_samples).reshape(-1, 1)

X_cos = np.cos(2 * np.pi * ks * ts / period)
X_sin = np.sin(2 * np.pi * ks * ts / period)


def fourier_sum(theta: np.ndarray) -> np.ndarray:
    a0, a1, b1, a2, b2, a3, b3 = theta
    theta_cos = np.array([a1, a2, a3])
    theta_sin = np.array([b1, b2, b3]) 
    return a0 + np.dot(X_cos, theta_cos) + np.dot(X_sin, theta_sin)


def obj(x: np.ndarray):
    y_hat = fourier_sum(x)
    return np.mean((y - y_hat) ** 2)


result = minimize(obj, x0=np.array([1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]))
y_hat = fourier_sum(result.x)

In [None]:
fig, ax = plt.subplots()
n_samples_to_plot = 150
ax.plot(y[-n_samples_to_plot:], label="Observed", lw=2)
ax.plot(y_hat[-n_samples_to_plot:], label="Fitted", lw=2)
ax.set(ylabel="Count")
ax.legend()
fig.tight_layout();

## Prophet Model

In [None]:
cycle_counts = cycle_counts.reset_index().rename(columns={"date": "ds", "count": "y"})

n_test = 4 * 7
train = cycle_counts.iloc[:-n_test]
val = cycle_counts.iloc[-n_test:]

In [None]:
model = Prophet(
    growth="linear",
    changepoint_prior_scale=0.05,  # adjust trend changepoint sparsity (lower values -> more regularization)
    yearly_seasonality='auto',
    weekly_seasonality=3, # Number of weekly Fourier terms (default = 3)
)
model = model.fit(train)

### Investigate the fit

In [None]:
# Fitted Values

fitted_values = model.predict(train[["ds"]])
fig = model.plot(fitted_values)
add_changepoints_to_plot(fig.gca(), model, fitted_values);

In [None]:
# Fitted components
fig = model.plot_components(fitted_values)

### Forecast

In [None]:
forecasts = model.predict(val[["ds"]])

fig, ax = plt.subplots()
ax.plot(train["ds"].values[-4*7:], train["y"].values[-4*7:], lw=2, color="black")
ax.plot(val["ds"].values, val["y"].values, lw=2, color="#1f77b4")
ax.plot(forecasts["ds"].values, forecasts["yhat"].values[-4*7:], lw=2, ls="--", color="#1f77b4")
ax.fill_between(
    forecasts["ds"].values,
    forecasts['yhat_lower'],
    forecasts['yhat_upper'],
    color='#0072B2',
    alpha=0.2
)

locator = AutoDateLocator(interval_multiples=False)
formatter = AutoDateFormatter(locator)
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)
ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
ax.set(ylabel="Counts")

for tick in ax.get_xticklabels():
    tick.set_rotation(45)
fig.tight_layout();