In [3]:
import hopsworks
import pandas as pd

# =========================================================
# 0) Login
# =========================================================
project = hopsworks.login(
    host="eu-west.cloud.hopsworks.ai",
    project="London_traffic"
)
fs = project.get_feature_store()

# =========================================================
# 1) Load original feature groups
# =========================================================
traffic_fg = fs.get_feature_group(name="traffic_temporal_fg", version=1)
weather_fg = fs.get_feature_group(name="weather_hourly_fg", version=1)
tfl_fg     = fs.get_feature_group(name="tfl_disruptions_hourly_fg", version=1)
labels_fg  = fs.get_feature_group(name="labels_speed_ratio_fg", version=1)

# =========================================================
# 2) Helper functions
# =========================================================
def read_fg_as_df(fg):
    # Using Hive is usually the safest for batch reads
    return fg.read(read_options={"use_hive": True})

def ensure_utc_timestamp(df, col):
    # Make sure timestamps are pandas datetime
    df[col] = pd.to_datetime(df[col], utc=True, errors="coerce")
    return df

def make_ts_1h(df, source_time_col, new_col="ts_1h"):
    df = ensure_utc_timestamp(df, source_time_col)
    df[new_col] = df[source_time_col].dt.floor("H")
    return df

def get_or_create_fg(name, version, primary_key, event_time, description):
    try:
        fg = fs.get_feature_group(name=name, version=version)
        print(f"[OK] Feature group exists: {name} v{version}")
        return fg
    except Exception:
        fg = fs.create_feature_group(
            name=name,
            version=version,
            primary_key=primary_key,
            event_time=event_time,
            description=description,
            online_enabled=False
        )
        print(f"[CREATED] Feature group: {name} v{version}")
        return fg

def insert_fg(fg, df):
    # Remove pandas index to avoid accidental columns
    df = df.reset_index(drop=True)
    fg.insert(df, write_options={"wait_for_job": True})
    print(f"[INSERTED] {fg.name} v{fg.version} rows={len(df)}")

# =========================================================
# 3) Prepare TRAFFIC (10-min) -> add ts_1h
#    Keep ts_10m as event_time + join labels on (point_id, ts_10m)
# =========================================================
traffic_prepared_name = "traffic_temporal_prepared_fg"
traffic_prepared_ver  = 1

traffic_prepared_fg = get_or_create_fg(
    name=traffic_prepared_name,
    version=traffic_prepared_ver,
    primary_key=["point_id", "ts_10m"],
    event_time="ts_10m",
    description="Prepared traffic FG: adds ts_1h = floor(ts_10m) for hourly joins."
)

# Only backfill/insert if empty or newly created
try:
    _ = traffic_prepared_fg.read(read_options={"use_hive": True}).head(1)
    print("[SKIP] traffic_prepared_fg already has data (not re-inserting).")
except Exception:
    traffic_df = read_fg_as_df(traffic_fg)
    traffic_df = make_ts_1h(traffic_df, source_time_col="ts_10m", new_col="ts_1h")
    insert_fg(traffic_prepared_fg, traffic_df)

# =========================================================
# 4) Prepare LABELS (10-min) -> add ts_1h (optional but handy)
#    Labels join to traffic on (point_id, ts_10m)
# =========================================================
labels_prepared_name = "labels_speed_ratio_prepared_fg"
labels_prepared_ver  = 1

labels_prepared_fg = get_or_create_fg(
    name=labels_prepared_name,
    version=labels_prepared_ver,
    primary_key=["point_id", "ts_10m"],
    event_time="ts_10m",
    description="Prepared labels FG: keeps 10-min keys and adds ts_1h for alignment/debug."
)

try:
    _ = labels_prepared_fg.read(read_options={"use_hive": True}).head(1)
    print("[SKIP] labels_prepared_fg already has data (not re-inserting).")
except Exception:
    labels_df = read_fg_as_df(labels_fg)
    labels_df = make_ts_1h(labels_df, source_time_col="ts_10m", new_col="ts_1h")
    insert_fg(labels_prepared_fg, labels_df)

# =========================================================
# 5) Prepare WEATHER (hourly) -> rename weather_time_utc -> ts_1h
#    Join on (point_id, ts_1h)
# =========================================================
weather_prepared_name = "weather_hourly_prepared_fg"
weather_prepared_ver  = 1

weather_prepared_fg = get_or_create_fg(
    name=weather_prepared_name,
    version=weather_prepared_ver,
    primary_key=["point_id", "ts_1h"],
    event_time="ts_1h",
    description="Prepared weather FG: renames weather_time_utc to ts_1h for joins."
)

try:
    _ = weather_prepared_fg.read(read_options={"use_hive": True}).head(1)
    print("[SKIP] weather_prepared_fg already has data (not re-inserting).")
except Exception:
    weather_df = read_fg_as_df(weather_fg)
    weather_df = ensure_utc_timestamp(weather_df, "weather_time_utc")
    weather_df = weather_df.rename(columns={"weather_time_utc": "ts_1h"})
    insert_fg(weather_prepared_fg, weather_df)

# =========================================================
# 6) Prepare TFL (hourly) -> rename tfl_time_utc -> ts_1h
#    Join on (point_id, ts_1h)
# =========================================================
tfl_prepared_name = "tfl_disruptions_hourly_prepared_fg"
tfl_prepared_ver  = 1

tfl_prepared_fg = get_or_create_fg(
    name=tfl_prepared_name,
    version=tfl_prepared_ver,
    primary_key=["point_id", "ts_1h"],
    event_time="ts_1h",
    description="Prepared TfL disruptions FG: renames tfl_time_utc to ts_1h for joins."
)

try:
    _ = tfl_prepared_fg.read(read_options={"use_hive": True}).head(1)
    print("[SKIP] tfl_prepared_fg already has data (not re-inserting).")
except Exception:
    tfl_df = read_fg_as_df(tfl_fg)
    tfl_df = ensure_utc_timestamp(tfl_df, "tfl_time_utc")
    tfl_df = tfl_df.rename(columns={"tfl_time_utc": "ts_1h"})
    insert_fg(tfl_prepared_fg, tfl_df)

# =========================================================
# 7) Create Feature View
#    - Join traffic + labels on (point_id, ts_10m)
#    - Join weather + tfl on (point_id, ts_1h)
# =========================================================
LABEL_COLS = ["label_speed_ratio_t_plus_30", "label_speed_ratio_t_plus_60"]

traffic_p = fs.get_feature_group(name=traffic_prepared_name, version=traffic_prepared_ver)
labels_p  = fs.get_feature_group(name=labels_prepared_name, version=labels_prepared_ver)
weather_p = fs.get_feature_group(name=weather_prepared_name, version=weather_prepared_ver)
tfl_p     = fs.get_feature_group(name=tfl_prepared_name, version=tfl_prepared_ver)

# Build query:
# 1) start with traffic (10-min)
# 2) add labels (10-min exact match)
# 3) add weather + tfl using hourly key already inside traffic (ts_1h)
q = (
    traffic_p.select_all()
    .join(labels_p.select(["point_id", "ts_10m"] + LABEL_COLS), on=["point_id", "ts_10m"])
    .join(weather_p.select_all(), on=["point_id", "ts_1h"])
    .join(tfl_p.select_all(), on=["point_id", "ts_1h"])
)

FV_NAME = "traffic_speed_ratio_fv"
FV_VERSION = 1

# Create or load FV
try:
    fv = fs.get_feature_view(name=FV_NAME, version=FV_VERSION)
    print(f"[OK] Feature view already exists: {FV_NAME} v{FV_VERSION}")
except Exception:
    fv = fs.create_feature_view(
        name=FV_NAME,
        version=FV_VERSION,
        query=q,
        labels=LABEL_COLS,
        description=(
            "Training FV: traffic (10-min) + labels (t+30,t+60) joined on (point_id, ts_10m) "
            "+ weather hourly + TfL hourly joined on (point_id, ts_1h)."
        )
    )
    print(f"[CREATED] Feature view: {FV_NAME} v{FV_VERSION}")

# =========================================================
# 8) Quick sanity check (small sample)
# =========================================================
df_preview = fv.get_batch_data(read_options={"use_hive": True})
print("FV shape:", df_preview.shape)
print("Columns sample:", df_preview.columns.tolist()[:40])
df_preview.head()


2026-01-11 13:45:20,288 INFO: Closing external client and cleaning up certificates.
2026-01-11 13:45:20,295 INFO: Connection closed.
2026-01-11 13:45:20,296 INFO: Initializing external client
2026-01-11 13:45:20,297 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2026-01-11 13:45:20,970 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/3209
[OK] Feature group exists: traffic_temporal_prepared_fg v1
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (2.01s) 




AttributeError: 'NoneType' object has no attribute 'insert'