In [None]:
import hopsworks
import sys
import os
from pathlib import Path
import pandas as pd

In [None]:
root_dir = str(Path().absolute())
print(f"Root dir: {root_dir}")

INPUT_DIR = Path("data/feature_groups")
TRAFFIC_CSV = INPUT_DIR / "traffic_features.csv"
WEATHER_CSV = INPUT_DIR / "weather_features.csv"
CALENDAR_CSV = INPUT_DIR / "calendar_features.csv"

# Add the root directory to the `PYTHONPATH` 
if root_dir not in sys.path:
    sys.path.append(root_dir)
    print(f"Added the following directory to the PYTHONPATH: {root_dir}")

HOPSWORKS_API_KEY = os.environ.get("HOPSWORKS_API_KEY")

In [None]:
print("\n--- Connecting to Hopsworks ---")
project = hopsworks.login(
    host="eu-west.cloud.hopsworks.ai",                            # DNS of your Hopsworks instance
    project="occupancy",                      # Name of your Hopsworks project
    api_key_value=HOPSWORKS_API_KEY    # Hopsworks API key value 
)
fs = project.get_feature_store()               # Get the project's default feature store"

In [None]:
def prepare_event_time(df: pd.DataFrame, date_col: str = "date", hour_col: str = "hour") -> pd.DataFrame:
    """
    1. Converts date to datetime to calculate event_time.
    2. Converts date BACK to string because Online Feature Groups 
       do not support Timestamp types as Primary Keys.
    """
    df = df.copy()
    
    dt_series = pd.to_datetime(df[date_col])
    
    if hour_col in df.columns:
        df["event_time"] = dt_series + pd.to_timedelta(df[hour_col], unit="h")
    else:
        df["event_time"] = dt_series
        
    df[date_col] = dt_series.dt.strftime("%Y-%m-%d")
        
    return df

In [None]:
# Traffic Data
print(f"Reading {TRAFFIC_CSV}...")
traffic_df = pd.read_csv(TRAFFIC_CSV)
traffic_df = prepare_event_time(traffic_df)

print("\n--- Processing Traffic Feature Group ---")
traffic_fg = fs.get_or_create_feature_group(
    name="skane_traffic",
    version=1,
    description="Aggregated hourly occupancy data for Skånetrafiken routes",
    primary_key=["route_id", "date", "hour"],
    event_time="event_time",
    online_enabled=True,
)
traffic_fg.insert(traffic_df)
print("Traffic data inserted.")

In [None]:
# Calendar data
print(f"Reading {CALENDAR_CSV}...")
calendar_df = pd.read_csv(CALENDAR_CSV)
calendar_df = prepare_event_time(calendar_df)

print("\n--- Processing Calendar Feature Group ---")
calendar_fg = fs.get_or_create_feature_group(
    name="sweden_calendar",
    version=1,
    description="Calendar and holiday data for Sweden",
    primary_key=["date"],
    event_time="event_time",
    online_enabled=True,
)
calendar_fg.insert(calendar_df)
print("Calendar data inserted.")

In [None]:
# Calendar data
print(f"Reading {WEATHER_CSV}...")
weather_df = pd.read_csv(WEATHER_CSV)
weather_df = prepare_event_time(weather_df)

print("\n--- Processing Weather Feature Group ---")
weather_fg = fs.get_or_create_feature_group(
    name="skane_weather",
    version=1,
    description="Hourly weather data for Skåne (OpenMeteo)",
    primary_key=["date", "hour"],
    event_time="event_time",
    online_enabled=True,
)
weather_fg.insert(weather_df)
print("Weather data inserted.")