In [37]:
# ─────────────────────────────────────────────────────
# ✅ FIX IMPORT PATH AND LOAD MODULES
# ─────────────────────────────────────────────────────
import sys
import os
import pandas as pd
from datetime import timedelta

# Add the root directory to Python path to access src/
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

# Now imports should work
import src.config as config
from src.inference import get_feature_store, load_model_from_registry, get_model_predictions


In [38]:
def fetch_hourly_rides(hours_back=12):
    fs = get_feature_store()
    fg = fs.get_feature_group(
        name=config.FEATURE_GROUP_NAME,
        version=config.FEATURE_GROUP_VERSION
    )

    # Read entire feature group (required in Python)
    df_all = fg.read()

    # Now get latest timestamp manually
    latest_hour = pd.to_datetime(df_all["pickup_hour"].max(), utc=True)
    current_hour = (latest_hour - timedelta(hours=hours_back)).floor('h')

    # Filter using a query
    query = fg.select_all()
    query = query.filter(fg.pickup_hour >= current_hour)

    df_result = query.read()
    print(f"✅ Found {len(df_result)} rows from {current_hour} to {latest_hour}")
    return df_result

df_actual = fetch_hourly_rides(hours_back=12)
df_actual.head()


2025-05-10 11:12:58,900 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-10 11:12:59,234 INFO: Initializing external client
2025-05-10 11:12:59,238 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-05-10 11:13:00,911 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215672
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.41s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.43s) 
✅ Found 54 rows from 2023-12-31 11:00:00+00:00 to 2023-12-31 23:00:00+00:00


Unnamed: 0,pickup_hour,pickup_location_id,rides
0,2023-12-31 14:00:00+00:00,6948.1,16
1,2023-12-31 17:00:00+00:00,6948.1,22
2,2023-12-31 09:00:00+00:00,5329.03,2
3,2023-12-31 08:00:00+00:00,5329.03,3
4,2023-12-31 13:00:00+00:00,6948.1,15


In [39]:
def fetch_recent_predictions(hours_back=12):
    fs = get_feature_store()
    fg = fs.get_feature_group(
        name=config.FEATURE_GROUP_MODEL_PREDICTION,  # "citi_bike_prediction"
        version=2
    )

    # Read to find latest available prediction hour
    df_all = fg.read()
    latest_hour = pd.to_datetime(df_all["pickup_hour"].max(), utc=True)
    current_hour = (latest_hour - timedelta(hours=hours_back)).floor('h')

    query = fg.select_all()
    query = query.filter(fg.pickup_hour >= current_hour)

    df_result = query.read()
    print(f"✅ Retrieved {len(df_result)} rows from {current_hour} to {latest_hour}")
    return df_result

df_pred = fetch_recent_predictions(12)
df_pred.head()


2025-05-10 11:13:06,246 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-10 11:13:06,293 INFO: Initializing external client
2025-05-10 11:13:06,297 INFO: Base URL: https://c.app.hopsworks.ai:443


2025-05-10 11:13:09,126 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215672
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.50s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.58s) 
✅ Retrieved 3 rows from 2023-12-31 12:00:00+00:00 to 2024-01-01 00:00:00+00:00


Unnamed: 0,pickup_location_id,pickup_hour,predicted_rides
0,6140.05,2024-01-01 00:00:00+00:00,3
1,5329.03,2024-01-01 00:00:00+00:00,4
2,6948.1,2024-01-01 00:00:00+00:00,9


In [40]:
# Shift predicted pickup_hour back to align with actuals
df_pred["pickup_hour"] = df_pred["pickup_hour"] - pd.Timedelta(hours=1)

# Ensure matching dtypes for merge keys
df_actual["pickup_location_id"] = df_actual["pickup_location_id"].astype(str)
df_pred["pickup_location_id"] = df_pred["pickup_location_id"].astype(str)

# Merge and compute absolute error
merged_df = pd.merge(
    df_actual,
    df_pred,
    on=["pickup_location_id", "pickup_hour"]
)
merged_df["absolute_error"] = abs(merged_df["predicted_rides"] - merged_df["rides"])
merged_df.head()


Unnamed: 0,pickup_hour,pickup_location_id,rides,predicted_rides,absolute_error
0,2023-12-31 23:00:00+00:00,6140.05,1,3,2
1,2023-12-31 23:00:00+00:00,5329.03,0,4,4
2,2023-12-31 23:00:00+00:00,6948.1,9,9,0


In [41]:
mae_by_hour = (
    merged_df
    .groupby('pickup_hour')['absolute_error']
    .mean()
    .reset_index()
    .rename(columns={'absolute_error': 'MAE'})
)
mae_by_hour.head()


Unnamed: 0,pickup_hour,MAE
0,2023-12-31 23:00:00+00:00,2.0


In [42]:
import plotly.express as px

fig = px.line(
    mae_by_hour,
    x='pickup_hour',
    y='MAE',
    title='Mean Absolute Error (MAE) by Pickup Hour',
    labels={'pickup_hour': 'Pickup Hour', 'MAE': 'Mean Absolute Error'},
    markers=True
)

fig.show()


In [43]:
print("🔢 Average MAE across hours:", mae_by_hour["MAE"].mean())


🔢 Average MAE across hours: 2.0


In [44]:
print("🔍 FG name:", config.FEATURE_GROUP_MODEL_PREDICTION)
print("🔍 FG version:", config.FEATURE_GROUP_MODEL_PREDICTION_VERSION)


🔍 FG name: citi_bike_prediction
🔍 FG version: 2
