In [1]:
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [3]:
csv_path = "transport_data.csv"
date_col = "Date"
features = ["Local Route","Light Rail","Peak Service","Rapid Route","School","Other"]
forecast_days = 7
use_log = True   

In [None]:
df = pd.read_csv(csv_path, parse_dates=[date_col])
df = df.sort_values(date_col).reset_index(drop=True)

In [5]:
last_idx = df.index.max()
start_idx = int(last_idx + 1)
index_vals = list(range(start_idx, start_idx + forecast_days))

In [6]:
feat_forecasts = {}
eval_list = []

In [13]:
for feat in features:
    print(f"\nProcessing feature: {feat}")

    data = pd.DataFrame({"ds": df[date_col], "y": df[feat].astype(float)}).copy()
    if use_log:
        data["y"] = np.log1p(data["y"])

    m = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False)
    m.fit(data)

    future = m.make_future_dataframe(periods=forecast_days, freq="D")
    forecast = m.predict(future)[["ds", "yhat", "yhat_lower", "yhat_upper"]].copy()


    future_preds = forecast.tail(forecast_days).copy()
    if use_log:
        vals = np.expm1(future_preds["yhat"].values)
    else:
        vals = future_preds["yhat"].values
    feat_forecasts[feat] = vals

    hist_pred = forecast[forecast["ds"] <= data["ds"].max()][["ds", "yhat"]].copy()
    orig = pd.DataFrame({"ds": df[date_col], "y_orig": df[feat].astype(float)})

    merged = orig.merge(hist_pred, on="ds", how="left").dropna(subset=["y_orig", "yhat"])
    if merged.shape[0] == 0:
        print(f"  Warning: no overlapping predictions for {feat}; skipping metrics.")
        eval_list.append({"feature": feat, "MAE": np.nan, "RMSE": np.nan, "n": 0})
        continue

    if use_log:
        y_true = merged["y_orig"].astype(float)
        y_pred = np.expm1(merged["yhat"].astype(float))
    else:
        y_true = merged["y_orig"].astype(float)
        y_pred = merged["yhat"].astype(float)

    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = float(np.sqrt(mse))

    eval_list.append({"feature": feat, "MAE": mae, "RMSE": rmse, "n": merged.shape[0]})
    print(f"  Eval rows: {merged.shape[0]}  MAE: {mae:.4f}  RMSE: {rmse:.4f}")
    



Processing feature: Local Route


11:24:46 - cmdstanpy - INFO - Chain [1] start processing
11:24:46 - cmdstanpy - INFO - Chain [1] done processing


  Eval rows: 1918  MAE: 4456.9857  RMSE: 5525.9365

Processing feature: Light Rail


11:24:48 - cmdstanpy - INFO - Chain [1] start processing
11:24:48 - cmdstanpy - INFO - Chain [1] done processing


  Eval rows: 1918  MAE: 2337.5728  RMSE: 2929.8652

Processing feature: Peak Service


11:24:50 - cmdstanpy - INFO - Chain [1] start processing
11:24:50 - cmdstanpy - INFO - Chain [1] done processing


  Eval rows: 1918  MAE: 134.0347  RMSE: 175.1960

Processing feature: Rapid Route


11:24:51 - cmdstanpy - INFO - Chain [1] start processing
11:24:51 - cmdstanpy - INFO - Chain [1] done processing


  Eval rows: 1918  MAE: 4633.6261  RMSE: 5901.3880

Processing feature: School


11:24:53 - cmdstanpy - INFO - Chain [1] start processing
11:24:53 - cmdstanpy - INFO - Chain [1] done processing


  Eval rows: 1918  MAE: 2178.4161  RMSE: 3116.2943

Processing feature: Other


11:24:54 - cmdstanpy - INFO - Chain [1] start processing
11:24:55 - cmdstanpy - INFO - Chain [1] done processing


  Eval rows: 1898  MAE: 20.6349  RMSE: 37.2364


In [8]:
wide = pd.DataFrame(feat_forecasts, index=index_vals)

In [9]:
pd.set_option("display.precision", 6)
pd.set_option("display.width", 140)


In [10]:
print("\n\nForecasted Data for All Columns:\n")
print(wide.to_string())



Forecasted Data for All Columns:

      Local Route   Light Rail  Peak Service   Rapid Route      School      Other
1918  9361.871158  6806.741508    149.095414  11009.606260  123.988159  46.413074
1919  9139.726231  6389.874937    159.567493  10599.417299   97.591889  47.991300
1920  8611.645853  6299.336909    131.241489  10106.686239   72.537886  44.051186
1921  7484.269918  5872.524834     84.575772   9019.121105   41.827149  41.312344
1922  3392.416454  4246.422919      3.552445   5563.069451    0.226679  23.961049
1923  2556.534221  3380.500242      3.213659   4526.586549    0.123868  24.425913
1924  6273.310741  5166.018795     64.856418   7806.936112   28.603480  33.584234


In [11]:
eval_df = pd.DataFrame(eval_list)[["feature","MAE","RMSE","n"]]
print("\n\nEvaluation Metrics (in-sample):\n")
print(eval_df.to_string(index=False))



Evaluation Metrics (in-sample):

     feature         MAE        RMSE    n
 Local Route 4456.985712 5525.936503 1918
  Light Rail 2337.572785 2929.865250 1918
Peak Service  134.034692  175.196017 1918
 Rapid Route 4633.626064 5901.388030 1918
      School 2178.416133 3116.294316 1918
       Other   20.634900   37.236414 1898
