In [None]:
Forecast

In [None]:
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
csv_path = "transport_data.csv"
date_col = "Date"
features = ["Local Route","Light Rail","Peak Service","Rapid Route","School","Other"]
forecast_days = 7
use_log = True   

In [None]:
df = pd.read_csv(csv_path, parse_dates=[date_col])
df = df.sort_values(date_col).reset_index(drop=True)

In [None]:
last_idx = df.index.max()
start_idx = int(last_idx + 1)
index_vals = list(range(start_idx, start_idx + forecast_days))

In [None]:
feat_forecasts = {}
eval_list = []

In [None]:
for feat in features:
    print(f"\nProcessing feature: {feat}")

    data = pd.DataFrame({"ds": df[date_col], "y": df[feat].astype(float)}).copy()
    if use_log:
        data["y"] = np.log1p(data["y"])

    m = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False)
    m.fit(data)

    future = m.make_future_dataframe(periods=forecast_days, freq="D")
    forecast = m.predict(future)[["ds", "yhat", "yhat_lower", "yhat_upper"]].copy()


    future_preds = forecast.tail(forecast_days).copy()
    if use_log:
        vals = np.expm1(future_preds["yhat"].values)
    else:
        vals = future_preds["yhat"].values
    feat_forecasts[feat] = vals

    hist_pred = forecast[forecast["ds"] <= data["ds"].max()][["ds", "yhat"]].copy()
    orig = pd.DataFrame({"ds": df[date_col], "y_orig": df[feat].astype(float)})

    merged = orig.merge(hist_pred, on="ds", how="left").dropna(subset=["y_orig", "yhat"])
    if merged.shape[0] == 0:
        print(f"  Warning: no overlapping predictions for {feat}; skipping metrics.")
        eval_list.append({"feature": feat, "MAE": np.nan, "RMSE": np.nan, "n": 0})
        continue

    if use_log:
        y_true = merged["y_orig"].astype(float)
        y_pred = np.expm1(merged["yhat"].astype(float))
    else:
        y_true = merged["y_orig"].astype(float)
        y_pred = merged["yhat"].astype(float)

    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = float(np.sqrt(mse))

    eval_list.append({"feature": feat, "MAE": mae, "RMSE": rmse, "n": merged.shape[0]})
    print(f"  Eval rows: {merged.shape[0]}  MAE: {mae:.4f}  RMSE: {rmse:.4f}")
    


In [None]:
wide = pd.DataFrame(feat_forecasts, index=index_vals)

In [None]:
pd.set_option("display.precision", 6)
pd.set_option("display.width", 140)


In [None]:
print("\n\nForecasted Data for All Columns:\n")
print(wide.to_string())

In [None]:
eval_df = pd.DataFrame(eval_list)[["feature","MAE","RMSE","n"]]
print("\n\nEvaluation Metrics (in-sample):\n")
print(eval_df.to_string(index=False))