In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import kpss, adfuller

# SARIMA

In [None]:
LOCATION = "Nelson St"

In [None]:
cycle_counts = pd.read_csv("cycle_counts.csv", parse_dates=["time"])
cycle_counts = cycle_counts[cycle_counts["location"] == LOCATION]

In [None]:
fig, ax = plt.subplots()
ax.plot(cycle_counts["time"], cycle_counts["count"], lw=1.5)
ax.set(title=LOCATION, ylabel="Count")
for tick in ax.get_xticklabels():
    tick.set_rotation(45)
fig.tight_layout();

In [None]:
cycle_counts["time"] = pd.to_datetime(cycle_counts["time"])
cycle_counts = cycle_counts.set_index("time").drop(columns=["location"])
cycle_counts = cycle_counts.resample("D").sum()
cycle_counts = cycle_counts.fillna(0)

## Autocorrelation

https://www.statsmodels.org/stable/examples/notebooks/generated/stationarity_detrending_adf_kpss.html

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 3.5), sharey=True,)

plot_acf(cycle_counts["count"], ax=ax[0])
ax[0].set(xlabel="Lag")

plot_pacf(cycle_counts["count"], ax=ax[1])
ax[1].set(xlabel="Lag")

fig.tight_layout();

In [None]:
kpss(cycle_counts["count"])

### Differencing 

In [None]:
seasonal_diff = cycle_counts["count"].diff(7)
seasonal_diff.dropna().plot()

seasonal_diff = cycle_counts["count"].diff(7).dropna().diff(1)
seasonal_diff.dropna().plot()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 3.5), sharey=True,)

diffs = cycle_counts["count"].diff(7).dropna()

plot_acf(diffs, ax=ax[0])
ax[0].set(xlabel="Lag")

plot_pacf(diffs, ax=ax[1])
ax[1].set(xlabel="Lag")

fig.tight_layout();

In [None]:
kpss(np.array(diffs))

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 3.5), sharey=True,)

diffs = cycle_counts["count"].diff(7).dropna().diff(1).dropna()

plot_acf(diffs, ax=ax[0])
ax[0].set(xlabel="Lag")

plot_pacf(diffs, ax=ax[1])
ax[1].set(xlabel="Lag")

fig.tight_layout();

In [None]:
diffs

In [None]:
kpss(diffs)