In [19]:
import os
import json
import joblib
import numpy as np
import pandas as pd
from datetime import datetime, timezone

import hopsworks


In [20]:
# =========================
# Hopsworks
# =========================
PROJECT_NAME = os.environ.get("HOPSWORKS_PROJECT_NAME")
HOPSWORKS_API_KEY = os.environ.get("HOPSWORKS_API_KEY")

# =========================
# Feature Groups
# =========================
TRAFFIC_FG_NAME, TRAFFIC_FG_VER = "traffic_temporal_fg", 1
WEATHER_FG_NAME, WEATHER_FG_VER = "weather_10m_fg", 1
TFL_FG_NAME, TFL_FG_VER         = "tfl_disruptions_10m_fg", 1

# =========================
# Columns
# =========================
POINT_ID_COL = "point_id"

# If your FG already has ts_10m use that; otherwise we create it from timestamp_utc
TS_10M_COL = "ts_10m"
RAW_TS_CANDIDATES = ["timestamp_utc", "timestamp", "datetime", "time"]

# =========================
# Model
# =========================
MODEL_NAME = "traffic_speed_ratio_keras"   
MODEL_VERSION = None                      

PRED_COL_30 = "pred_speed_ratio_t_30"
PRED_COL_60 = "pred_speed_ratio_t_60"

# =========================
# Output
# =========================
N_POINTS = 50
OUT_JSON_PATH = "data/predictions/predictions_latest.json"

In [21]:
# 1. Login to Hopsworks
project = hopsworks.login(
    host="eu-west.cloud.hopsworks.ai",
    project="London_traffic"
)
fs = project.get_feature_store()
mr = project.get_model_registry()

2026-01-11 12:03:20,553 INFO: Closing external client and cleaning up certificates.
2026-01-11 12:03:20,556 INFO: Connection closed.
2026-01-11 12:03:20,558 INFO: Initializing external client
2026-01-11 12:03:20,559 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2026-01-11 12:03:21,256 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/3209


In [22]:
def find_timestamp_col(df: pd.DataFrame, candidates=RAW_TS_CANDIDATES) -> str | None:
    for c in candidates:
        if c in df.columns:
            return c
    return None

def ensure_ts_10m(df: pd.DataFrame, ts_10m_col: str = TS_10M_COL) -> pd.DataFrame:
    df = df.copy()

    if ts_10m_col in df.columns:
        df[ts_10m_col] = pd.to_datetime(df[ts_10m_col], utc=True, errors="coerce")
        return df

    ts_col = find_timestamp_col(df)
    if ts_col is None:
        raise ValueError(
            f"No timestamp column found. Looked for {RAW_TS_CANDIDATES}. Available={list(df.columns)}"
        )

    df[ts_col] = pd.to_datetime(df[ts_col], utc=True, errors="coerce")
    df[ts_10m_col] = df[ts_col].dt.floor("10min")
    return df

def read_fg(fs, name: str, version: int) -> pd.DataFrame:
    fg = fs.get_feature_group(name=name, version=version)
    return fg.read()

def join_two(left: pd.DataFrame, right: pd.DataFrame, right_name: str) -> pd.DataFrame:
    left = left.copy()
    right = right.copy()

    if TS_10M_COL not in left.columns or TS_10M_COL not in right.columns:
        raise ValueError(f"Missing {TS_10M_COL} in join: left_has={TS_10M_COL in left.columns}, right_has={TS_10M_COL in right.columns}")

    left_has_pid = POINT_ID_COL in left.columns
    right_has_pid = POINT_ID_COL in right.columns

    if left_has_pid and right_has_pid:
        keys = [POINT_ID_COL, TS_10M_COL]
        out = left.merge(right, on=keys, how="left", suffixes=("", f"_{right_name}"))
        print(f"Joined {right_name} on {keys}. Shape={out.shape}")
        return out

    keys = [TS_10M_COL]
    out = left.merge(right, on=keys, how="left", suffixes=("", f"_{right_name}"))
    print(f"Joined {right_name} on {keys} only (no point_id in one side). Shape={out.shape}")
    return out

def latest_per_point(df: pd.DataFrame, n_points: int = N_POINTS) -> pd.DataFrame:
    if POINT_ID_COL not in df.columns:
        raise ValueError(f"{POINT_ID_COL} not found; cannot take latest per point.")

    df = df.copy()
    df = df.dropna(subset=[TS_10M_COL])
    df = df.sort_values([POINT_ID_COL, TS_10M_COL])
    df = df.drop_duplicates(subset=[POINT_ID_COL], keep="last")
    df = df.sort_values(TS_10M_COL, ascending=False).head(n_points).reset_index(drop=True)
    return df


In [23]:
df_tr = read_fg(fs, TRAFFIC_FG_NAME, TRAFFIC_FG_VER)
df_we = read_fg(fs, WEATHER_FG_NAME, WEATHER_FG_VER)
df_tf = read_fg(fs, TFL_FG_NAME, TFL_FG_VER)

print("Raw shapes:")
print("traffic:", df_tr.shape)
print("weather:", df_we.shape)
print("tfl:", df_tf.shape)

df_tr = ensure_ts_10m(df_tr)
df_we = ensure_ts_10m(df_we)
df_tf = ensure_ts_10m(df_tf)

print("Has columns:")
print("traffic:", list(df_tr.columns)[:25])
print("weather:", list(df_we.columns)[:25])
print("tfl:", list(df_tf.columns)[:25])


Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.61s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (2.64s) 
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (4.52s) 
Raw shapes:
traffic: (26157, 33)
weather: (230400, 6)
tfl: (389982, 7)
Has columns:
traffic: ['timestamp_utc', 'point_id', 'frc', 'current_speed', 'free_flow_speed', 'current_travel_time', 'free_flow_travel_time', 'confidence', 'road_closure', 'ts_10m', 'speed_ratio', 'delay_seconds', 'day_of_week', 'is_weekend', 'hour', 'minute', 'is_rush_hour', 'ti_evening_peak', 'ti_midday', 'ti_morning_peak', 'ti_night', 'speed_diff', 'travel_time_ratio', 'low_confidence_flag', 'speed_roll_mean_3']
weather: ['point_id', 'ts_10m', 'precipitation', 'rain', 'snowfall', 'temperature_2m']
tfl: ['point_id', 'ts_10m', 'disruption_count', 'is_active', 'is_incident', 'is_works', 'max_ordinal']


In [24]:
df_join = df_tr.copy()
df_join = join_two(df_join, df_we, "weather")
df_join = join_two(df_join, df_tf, "tfl")

df_latest = latest_per_point(df_join, n_points=N_POINTS)

print("Inference snapshot shape:", df_latest.shape)
df_latest[[POINT_ID_COL, TS_10M_COL]].head(10)


Joined weather on ['point_id', 'ts_10m']. Shape=(26157, 37)
Joined tfl on ['point_id', 'ts_10m']. Shape=(26157, 42)
Inference snapshot shape: (50, 42)


Unnamed: 0,point_id,ts_10m
0,36825,2026-01-11 16:00:00+00:00
1,16228,2026-01-11 16:00:00+00:00
2,26114,2026-01-11 16:00:00+00:00
3,38022,2026-01-11 16:00:00+00:00
4,6097,2026-01-11 16:00:00+00:00
5,26146,2026-01-11 16:00:00+00:00
6,26182,2026-01-11 16:00:00+00:00
7,36942,2026-01-11 16:00:00+00:00
8,26434,2026-01-11 16:00:00+00:00
9,26664,2026-01-11 16:00:00+00:00


In [25]:
def _walk_files(root_dir: str):
    for r, _, files in os.walk(root_dir):
        for f in files:
            yield os.path.join(r, f)

def _find_saved_model_dir(root_dir: str):
    for path in _walk_files(root_dir):
        if os.path.basename(path) == "saved_model.pb":
            return os.path.dirname(path)
    return None

def pick_model_from_registry(mr, model_name: str, model_version: int | None = None):
    if model_version is not None:
        model = mr.get_model(model_name, version=model_version)
        print(f"Using model {model_name} v{model_version}")
        return model

    try:
        model = mr.get_model(model_name, stage="production")
        print(f"Using model {model_name} (stage=production) v{model.version}")
        return model
    except Exception:
        print("No production stage found. Falling back to latest version...")
        models = mr.get_models(model_name)
        if len(models) == 0:
            raise ValueError(f"No models found with name={model_name}")
        latest = sorted(models, key=lambda m: m.version)[-1]
        model = mr.get_model(model_name, version=latest.version)
        print(f"Using model {model_name} v{model.version} (latest)")
        return model

def load_scaler_and_keras_from_dir(local_dir: str):
    # 1) Find keras file
    keras_file = None
    for path in _walk_files(local_dir):
        low = path.lower()
        if low.endswith(".keras") or low.endswith(".h5") or low.endswith(".hdf5"):
            keras_file = path
            break

    # 2) Find SavedModel directory
    saved_model_dir = None
    if keras_file is None:
        saved_model_dir = _find_saved_model_dir(local_dir)

    # 3) Find scaler-like pkl/joblib (object has transform but not predict)
    scaler = None
    pkl_candidates = [p for p in _walk_files(local_dir) if p.lower().endswith((".pkl", ".joblib"))]
    for p in pkl_candidates:
        try:
            obj = joblib.load(p)
            if hasattr(obj, "transform") and hasattr(obj, "fit") and not hasattr(obj, "predict"):
                scaler = obj
                print("Loaded scaler artifact:", p)
                break
        except Exception:
            continue

    # 4) Load keras model
    import tensorflow as tf
    if keras_file is not None:
        keras_model = tf.keras.models.load_model(keras_file)
        print("Loaded Keras model file:", keras_file)
        return scaler, keras_model

    if saved_model_dir is not None:
        keras_model = tf.keras.models.load_model(saved_model_dir)
        print("Loaded SavedModel directory:", saved_model_dir)
        return scaler, keras_model

    # 5) Debug: print small tree
    print("Could not find .keras/.h5 or SavedModel. Directory tree (limited):")
    for r, d, f in os.walk(local_dir):
        depth = r.replace(local_dir, "").count(os.sep)
        if depth > 3:
            continue
        print("  " * depth + os.path.basename(r) + "/")
        for ff in f[:20]:
            print("  " * (depth + 1) + ff)

    raise FileNotFoundError(f"No Keras model found under {local_dir}")


In [26]:
model_meta = pick_model_from_registry(mr, MODEL_NAME, MODEL_VERSION)
local_dir = model_meta.download()
print("Downloaded to:", local_dir)

# Optional: print top-level files to debug fast
try:
    print("Top-level files:", os.listdir(local_dir))
except Exception:
    pass

scaler, keras_model = load_scaler_and_keras_from_dir(local_dir)
print("Scaler loaded:", scaler is not None)
print("Keras model loaded ✅")


No production stage found. Falling back to latest version...
Using model traffic_speed_ratio_keras v2 (latest)


Downloading: 100.000%|██████████| 1535/1535 elapsed<00:00 remaining<?


Downloading model artifact (0 dirs, 1 files)... 

Downloading: 100.000%|██████████| 697206/697206 elapsed<00:00 remaining<00:00


Downloading model artifact (0 dirs, 2 files)... 

Downloading: 100.000%|██████████| 322/322 elapsed<00:00 remaining<?


Downloading model artifact (0 dirs, 3 files)... 

Downloading: 100.000%|██████████| 180/180 elapsed<00:00 remaining<?


Downloading model artifact (0 dirs, 4 files)... 

Downloading: 100.000%|██████████| 956/956 elapsed<00:00 remaining<?

Downloaded to: C:\Users\omarl\AppData\Local\Temp\55e2782c-fc81-4c4d-8016-e34ccc64a7eb\traffic_speed_ratio_keras/2
Top-level files: ['feature_cols.json', 'meta.json', 'metrics.json', 'model.keras', 'scaler.pkl']
Loaded scaler artifact: C:\Users\omarl\AppData\Local\Temp\55e2782c-fc81-4c4d-8016-e34ccc64a7eb\traffic_speed_ratio_keras/2\scaler.pkl
Loaded Keras model file: C:\Users\omarl\AppData\Local\Temp\55e2782c-fc81-4c4d-8016-e34ccc64a7eb\traffic_speed_ratio_keras/2\model.keras
Scaler loaded: True
Keras model loaded ✅





In [27]:
# Output columns to keep
OUTPUT_COLS = [POINT_ID_COL, TS_10M_COL]
OUTPUT_COLS = [c for c in OUTPUT_COLS if c in df_latest.columns]

# Exclude typical non-features
EXCLUDE_COLS = {
    POINT_ID_COL,
    TS_10M_COL,
    "timestamp",
    "timestamp_utc",
    # possible labels
    "speed_ratio_t+30", "speed_ratio_t+60",
    "label_t+30", "label_t+60",
}

feature_cols = [c for c in df_latest.columns if c not in EXCLUDE_COLS]
X = df_latest[feature_cols].copy()

print("X shape:", X.shape)
print("Feature cols sample:", feature_cols[:25])

# Convert bool -> int
for c in X.columns:
    if X[c].dtype == bool:
        X[c] = X[c].astype(int)

# Convert object -> numeric if possible
for c in X.columns:
    if X[c].dtype == "object":
        X[c] = pd.to_numeric(X[c], errors="coerce")

# Fill NaNs (ideally match training; default safe)
X = X.fillna(0.0)

# Ensure float32 for keras
X_values = X.values.astype(np.float32)


X shape: (50, 39)
Feature cols sample: ['frc', 'current_speed', 'free_flow_speed', 'current_travel_time', 'free_flow_travel_time', 'confidence', 'road_closure', 'speed_ratio', 'delay_seconds', 'day_of_week', 'is_weekend', 'hour', 'minute', 'is_rush_hour', 'ti_evening_peak', 'ti_midday', 'ti_morning_peak', 'ti_night', 'speed_diff', 'travel_time_ratio', 'low_confidence_flag', 'speed_roll_mean_3', 'speed_roll_std_3', 'delay_roll_mean_3', 'speed_roll_mean_6']


In [28]:
def keras_predict_two_horizons(model, X_np):
    pred = model.predict(X_np, verbose=0)

    if isinstance(pred, (list, tuple)) and len(pred) >= 2:
        p30 = np.asarray(pred[0]).reshape(-1)
        p60 = np.asarray(pred[1]).reshape(-1)
        return p30, p60

    pred = np.asarray(pred)
    if pred.ndim == 2 and pred.shape[1] >= 2:
        return pred[:, 0].reshape(-1), pred[:, 1].reshape(-1)

    if pred.ndim == 1 or (pred.ndim == 2 and pred.shape[1] == 1):
        return pred.reshape(-1), np.full((pred.shape[0],), np.nan)

    raise ValueError(f"Unexpected prediction shape: {pred.shape}")

# Scale if scaler exists
if scaler is not None:
    X_scaled = scaler.transform(X_values)
    X_scaled = np.asarray(X_scaled).astype(np.float32)
else:
    X_scaled = X_values

pred_30, pred_60 = keras_predict_two_horizons(keras_model, X_scaled)

print("Pred shapes:", pred_30.shape, pred_60.shape)
print("Pred sample:", pred_30[:5], pred_60[:5])


Pred shapes: (50,) (50,)
Pred sample: [0.89868975 0.8712174  0.73504764 0.84368205 0.8861835 ] [0.90779924 0.8620744  0.7338985  0.8298802  0.8843186 ]


In [29]:
# =========================
# Build df_pred keeping traffic columns + predictions
# =========================

# keys
KEY_COLS = [POINT_ID_COL, TS_10M_COL]
KEY_COLS = [c for c in KEY_COLS if c in df_latest.columns]

# traffic columns you want in the UI (keep only those that exist)
TRAFFIC_UI_COLS = [
    "frc",
    "current_speed", "free_flow_speed",
    "speed_ratio", "delay_seconds",
    "current_travel_time", "free_flow_travel_time",
    "travel_time_ratio", "speed_diff",
    "confidence", "road_closure",
    "low_confidence_flag",
    "is_rush_hour", "is_weekend",
    "day_of_week", "hour", "minute",
]

TRAFFIC_UI_COLS = [c for c in TRAFFIC_UI_COLS if c in df_latest.columns]

# build base (this keeps your traffic features)
df_pred = df_latest[KEY_COLS + TRAFFIC_UI_COLS].copy()

# add predictions
df_pred[PRED_COL_30] = pred_30
df_pred[PRED_COL_60] = pred_60

# tidy
df_pred[POINT_ID_COL] = df_pred[POINT_ID_COL].astype(str)
df_pred = df_pred.sort_values(POINT_ID_COL).reset_index(drop=True)

print("df_pred shape:", df_pred.shape)
df_pred.head(10)



df_pred shape: (50, 21)


Unnamed: 0,point_id,ts_10m,frc,current_speed,free_flow_speed,speed_ratio,delay_seconds,current_travel_time,free_flow_travel_time,travel_time_ratio,...,confidence,road_closure,low_confidence_flag,is_rush_hour,is_weekend,day_of_week,hour,minute,pred_speed_ratio_t_30,pred_speed_ratio_t_60
0,16228,2026-01-11 16:00:00+00:00,FRC2,62.0,62.0,1.0,0.0,159.0,159.0,1.0,...,1.0,False,0,1,1,6,16,0,0.871217,0.862074
1,16250,2026-01-11 16:00:00+00:00,FRC3,13.0,16.0,0.8125,15.0,78.0,63.0,1.238095,...,1.0,False,0,1,1,6,16,0,0.864029,0.875404
2,16435,2026-01-11 16:00:00+00:00,FRC3,12.0,18.0,0.666667,138.0,413.0,275.0,1.501818,...,1.0,False,0,1,1,6,16,0,0.861034,0.857013
3,16737,2026-01-11 16:00:00+00:00,FRC3,20.0,20.0,1.0,0.0,316.0,316.0,1.0,...,1.0,False,0,1,1,6,16,0,0.900048,0.901368
4,16741,2026-01-11 16:00:00+00:00,FRC2,24.0,31.0,0.774194,70.0,309.0,239.0,1.292887,...,1.0,False,0,1,1,6,16,0,0.823388,0.836902
5,16755,2026-01-11 16:00:00+00:00,FRC3,19.0,30.0,0.633333,97.0,265.0,168.0,1.577381,...,1.0,False,0,1,1,6,16,0,0.778052,0.762026
6,16756,2026-01-11 16:00:00+00:00,FRC3,9.0,18.0,0.5,50.0,99.0,49.0,2.020408,...,1.0,False,0,1,1,6,16,0,0.65457,0.601123
7,16764,2026-01-11 16:00:00+00:00,FRC2,6.0,23.0,0.26087,213.0,287.0,74.0,3.878378,...,1.0,False,0,1,1,6,16,0,0.605961,0.533973
8,16791,2026-01-11 16:00:00+00:00,FRC3,30.0,36.0,0.833333,15.0,89.0,74.0,1.202703,...,1.0,False,0,1,1,6,16,0,0.864782,0.864342
9,17169,2026-01-11 16:00:00+00:00,FRC2,9.0,16.0,0.5625,130.0,297.0,167.0,1.778443,...,1.0,False,0,1,1,6,16,0,0.68726,0.652561


In [30]:
# =========================
# Merge df_pred (50 rows) with metadata (200 rows) => stays 50
# =========================

META_FG_NAME = "traffic_points_metadata"
META_FG_VER = 1

meta_fg = fs.get_feature_group(name=META_FG_NAME, version=META_FG_VER)
df_meta = meta_fg.read()

df_meta = df_meta.copy()
df_meta[POINT_ID_COL] = df_meta[POINT_ID_COL].astype(str)
df_meta = df_meta.drop_duplicates(subset=[POINT_ID_COL], keep="last")

df_merged = df_pred.merge(df_meta, on=POINT_ID_COL, how="left", suffixes=("", "_meta"))
print("df_merged shape (should be 50 rows):", df_merged.shape)

# =========================
# Final UI dataframe (choose the columns you want)
# =========================
ui_cols_order = [
    POINT_ID_COL, TS_10M_COL,
    "latitude", "longitude",
    "road_name", "road_category", "road_type",
    "region_id", "local_authority_id",
    "start_junction_road_name", "end_junction_road_name",
    "link_length_km",
    # traffic + preds already inside df_pred
    "frc", "current_speed", "free_flow_speed", "speed_ratio", "delay_seconds",
    "confidence", "road_closure", "low_confidence_flag", "is_rush_hour",
    PRED_COL_30, PRED_COL_60
]
ui_cols = [c for c in ui_cols_order if c in df_merged.columns]
df_ui = df_merged[ui_cols].copy()

# ts_10m ISO for UI
if TS_10M_COL in df_ui.columns:
    df_ui[TS_10M_COL] = pd.to_datetime(df_ui[TS_10M_COL], utc=True, errors="coerce")
    df_ui[TS_10M_COL] = df_ui[TS_10M_COL].dt.strftime("%Y-%m-%dT%H:%M:%SZ")

# bool -> int
for c in df_ui.columns:
    if df_ui[c].dtype == bool:
        df_ui[c] = df_ui[c].astype(int)

df_ui = df_ui.sort_values(POINT_ID_COL).reset_index(drop=True)
print("df_ui shape:", df_ui.shape)
df_ui.head(10)


Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.19s) 
df_merged shape (should be 50 rows): (50, 29)
df_ui shape: (50, 20)


Unnamed: 0,point_id,ts_10m,latitude,longitude,road_name,road_category,road_type,local_authority_id,link_length_km,frc,current_speed,free_flow_speed,speed_ratio,delay_seconds,confidence,road_closure,low_confidence_flag,is_rush_hour,pred_speed_ratio_t_30,pred_speed_ratio_t_60
0,16228,2026-01-11T16:00:00Z,51.42891,0.084388,A20,PA,Major,106,2.3,FRC2,62.0,62.0,1.0,0.0,1.0,0,0,1,0.871217,0.862074
1,16250,2026-01-11T16:00:00Z,51.497434,-0.111885,A23,PA,Major,107,0.3,FRC3,13.0,16.0,0.8125,15.0,1.0,0,0,1,0.864029,0.875404
2,16435,2026-01-11T16:00:00Z,51.522479,-0.160005,A4380,PA,Major,109,0.5,FRC3,12.0,18.0,0.666667,138.0,1.0,0,0,1,0.861034,0.857013
3,16737,2026-01-11T16:00:00Z,51.50406,-0.104551,A201,PA,Major,103,0.9,FRC3,20.0,20.0,1.0,0.0,1.0,0,0,1,0.900048,0.901368
4,16741,2026-01-11T16:00:00Z,51.482807,0.06236,A205,PA,Major,105,1.9,FRC2,24.0,31.0,0.774194,70.0,1.0,0,0,1,0.823388,0.836902
5,16755,2026-01-11T16:00:00Z,51.421815,-0.052147,A213,PA,Major,176,1.3,FRC3,19.0,30.0,0.633333,97.0,1.0,0,0,1,0.778052,0.762026
6,16756,2026-01-11T16:00:00Z,51.383489,-0.105944,A213,PA,Major,134,0.3,FRC3,9.0,18.0,0.5,50.0,1.0,0,0,1,0.65457,0.601123
7,16764,2026-01-11T16:00:00Z,51.463984,-0.215483,A219,PA,Major,108,0.5,FRC2,6.0,23.0,0.26087,213.0,1.0,0,0,1,0.605961,0.533973
8,16791,2026-01-11T16:00:00Z,51.389242,-0.305492,A243,PA,Major,178,1.2,FRC3,30.0,36.0,0.833333,15.0,1.0,0,0,1,0.864782,0.864342
9,17169,2026-01-11T16:00:00Z,51.526551,-0.13317,A501,PA,Major,145,0.4,FRC2,9.0,16.0,0.5625,130.0,1.0,0,0,1,0.68726,0.652561


In [31]:
payload = {
    "generated_at_utc": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
    "model_name": MODEL_NAME,
    "model_version": getattr(model_meta, "version", None),
    "n_points": int(len(df_ui)),
    "predictions": df_ui.to_dict(orient="records"),
}

json_str = json.dumps(payload, ensure_ascii=False)

with open("predictions_latest.json", "w", encoding="utf-8") as f:
    f.write(json_str)

print("Saved predictions_latest.json ✅")
print(json_str[:600], "...")


Saved predictions_latest.json ✅
{"generated_at_utc": "2026-01-11T18:03:41Z", "model_name": "traffic_speed_ratio_keras", "model_version": 2, "n_points": 50, "predictions": [{"point_id": "16228", "ts_10m": "2026-01-11T16:00:00Z", "latitude": 51.4289098, "longitude": 0.08438768, "road_name": "A20", "road_category": "PA", "road_type": "Major", "local_authority_id": 106, "link_length_km": 2.3, "frc": "FRC2", "current_speed": 62.0, "free_flow_speed": 62.0, "speed_ratio": 1.0, "delay_seconds": 0.0, "confidence": 1.0, "road_closure": 0, "low_confidence_flag": 0, "is_rush_hour": 1, "pred_speed_ratio_t_30": 0.8712174296379089, "pred_s ...


In [32]:
# =========================
# Prepare df_ui for Feature Group
# =========================

df_fg = df_ui.copy()

# Asegura tipos correctos
df_fg[POINT_ID_COL] = df_fg[POINT_ID_COL].astype(str)

# event time como datetime (no string) para FG
if TS_10M_COL in df_fg.columns:
    df_fg[TS_10M_COL] = pd.to_datetime(df_fg[TS_10M_COL], utc=True, errors="coerce")

# bool -> int ya lo hicimos, ok
# Asegura floats en preds
for c in [PRED_COL_30, PRED_COL_60]:
    if c in df_fg.columns:
        df_fg[c] = pd.to_numeric(df_fg[c], errors="coerce").astype(float)

print(df_fg.dtypes)
df_fg.head()


point_id                              object
ts_10m                   datetime64[ns, UTC]
latitude                             float64
longitude                            float64
road_name                             object
road_category                         object
road_type                             object
local_authority_id                     int64
link_length_km                       float64
frc                                   object
current_speed                        float64
free_flow_speed                      float64
speed_ratio                          float64
delay_seconds                        float64
confidence                           float64
road_closure                           int32
low_confidence_flag                    int64
is_rush_hour                           int64
pred_speed_ratio_t_30                float64
pred_speed_ratio_t_60                float64
dtype: object


Unnamed: 0,point_id,ts_10m,latitude,longitude,road_name,road_category,road_type,local_authority_id,link_length_km,frc,current_speed,free_flow_speed,speed_ratio,delay_seconds,confidence,road_closure,low_confidence_flag,is_rush_hour,pred_speed_ratio_t_30,pred_speed_ratio_t_60
0,16228,2026-01-11 16:00:00+00:00,51.42891,0.084388,A20,PA,Major,106,2.3,FRC2,62.0,62.0,1.0,0.0,1.0,0,0,1,0.871217,0.862074
1,16250,2026-01-11 16:00:00+00:00,51.497434,-0.111885,A23,PA,Major,107,0.3,FRC3,13.0,16.0,0.8125,15.0,1.0,0,0,1,0.864029,0.875404
2,16435,2026-01-11 16:00:00+00:00,51.522479,-0.160005,A4380,PA,Major,109,0.5,FRC3,12.0,18.0,0.666667,138.0,1.0,0,0,1,0.861034,0.857013
3,16737,2026-01-11 16:00:00+00:00,51.50406,-0.104551,A201,PA,Major,103,0.9,FRC3,20.0,20.0,1.0,0.0,1.0,0,0,1,0.900048,0.901368
4,16741,2026-01-11 16:00:00+00:00,51.482807,0.06236,A205,PA,Major,105,1.9,FRC2,24.0,31.0,0.774194,70.0,1.0,0,0,1,0.823388,0.836902


In [33]:
PRED_FG_NAME = "traffic_predictions_fg"
PRED_FG_VER = 1

# intenta obtenerlo; si no existe, créalo
pred_fg = fs.get_feature_group(name=PRED_FG_NAME, version=PRED_FG_VER)
print("Prediction FG already exists ✅")
pred_fg = fs.create_feature_group(
        name=PRED_FG_NAME,
        version=PRED_FG_VER,
        description="Traffic predictions for +30 and +60 minutes (joined with metadata, ready for UI).",
        primary_key=[POINT_ID_COL, TS_10M_COL],   # histórico: guarda cada timestamp
        event_time=TS_10M_COL
    )
print("Created Prediction FG ✅")


Prediction FG already exists ✅
Created Prediction FG ✅


In [34]:
# Append every run (keeps history)
pred_fg.insert(df_fg)

print("Inserted predictions to Feature Group ✅")

Feature Group created successfully, explore it at 
https://eu-west.cloud.hopsworks.ai:443/p/3209/fs/3154/fg/2346


Uploading Dataframe: 100.00% |██████████| Rows 50/50 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: traffic_predictions_fg_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/3209/jobs/named/traffic_predictions_fg_1_offline_fg_materialization/executions
Inserted predictions to Feature Group ✅
