In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

from prophet import Prophet

from bikes.evaluate.split import train_test_split, TEST_PERIOD

In [None]:
def plot_timeseries(actual: pd.Series, predicted: pd.Series):
    fig, ax = plt.subplots()

    ax.plot(actual, label="Observed")
    ax.plot(predicted, label="Predicted")

    ax.set(ylabel="Count")
    for tick in ax.get_xticklabels():
        tick.set_rotation(45)
    ax.legend()

    fig.tight_layout();

    return ax

## Prophet

In [None]:
cycle_counts = pd.read_csv("cycle_counts.csv", parse_dates=["date"])

In [None]:
LOCATION = "Quay Street Eco Display Classic"

location_df = cycle_counts.loc[cycle_counts["location"] == LOCATION].copy()
location_df = location_df.set_index("date").sort_index()
train_df, test_df = train_test_split(location_df)
y_train, y_test = train_df["count"], test_df["count"]

In [None]:
fig, ax = plt.subplots()
ax.plot(y_train.iloc[-500:], label="Observed")
ax.set(ylabel="Count")
for tick in ax.get_xticklabels():
    tick.set_rotation(45)
ax.legend()
fig.tight_layout();

In [None]:
prophet_train_df = (
    train_df
    .drop(columns=["location"])
    .reset_index()
    .rename(columns={"date": "ds", "count": "y"})
)

prophet = Prophet().fit(prophet_train_df)

test_start, test_end = TEST_PERIOD
test_dates = pd.date_range(test_start, test_end, freq="D", inclusive="left")
forecasts = prophet.predict(pd.DataFrame({"ds": test_dates}))[["ds", "yhat"]]
forecasts = forecasts.set_index("ds").rename(columns={"yhat": "yhat_prophet"})

In [None]:
plot_timeseries(y_test, forecasts["yhat_prophet"])

In [None]:
# Save forecasts
forecast_df = pd.merge(
    left=test_df.rename(columns={"count": "ytrue"}),
    right=forecasts,
    left_index=True,
    right_index=True,
    how="left"
)
forecast_df = forecast_df.reset_index()

In [None]:
assert not forecast_df["date"].isna().any()

In [None]:
forecast_df.to_csv(f"./forecasts/prophet/{LOCATION.replace(' ', '_').lower()}.csv")