# Model inference

1. Download model and batch inference data
2. Make predictions
3. Store predictions in a monitoring feature group

### Connect to Hopsworks

In [276]:
import hopsworks
import xgboost as xgb
import unicodedata
import re
from xgboost import XGBRegressor

import pandas as pd
import numpy as np
import os

# connect with Hopsworks
project = hopsworks.login(
        host="eu-west.cloud.hopsworks.ai",
        project="ID2223_Project",
        api_key_value=os.environ["HOPSWORKS_API_KEY"]
    )

# Get feature view
fs = project.get_feature_store()
fv = fs.get_feature_view('avalanche_warning_fv_new_corrected_more_features_and_lags', version=5)

# Get model registry
mr = project.get_model_registry()

2026-01-11 19:33:22,080 INFO: Closing external client and cleaning up certificates.
2026-01-11 19:33:22,081 INFO: Connection closed.
2026-01-11 19:33:22,083 INFO: Initializing external client
2026-01-11 19:33:22,084 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2026-01-11 19:33:22,944 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/2173


In [255]:
def sanitize_name(name):
    # Normalize Unicode to ASCII, ignore accents
    name_ascii = unicodedata.normalize('NFKD', name).encode('ASCII', 'ignore').decode()
    # Replace anything not a-z, A-Z, 0-9, or _ with underscore
    name_clean = re.sub(r'[^a-zA-Z0-9_]', '_', name_ascii)
    return name_clean

def predict(model: xgb.XGBRegressor, features_df: pd.DataFrame) -> float:
    """
    Predict avalanche risk
    """
    features_df = features_df.astype(float)
    return float(model.predict(features_df)[0])

In [256]:
# Create batch data for the feature view
batch_data = fv.get_batch_data(dataframe_type="pandas")

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (6.95s) 


### Download models from Model Registry

In [257]:
from xgboost import XGBRegressor

#Retrieve the name resorts
resorts = {loc: None for loc in batch_data["location"].unique()}

models = {}           
model_dirs = {}      

for loc in resorts.keys():  
    loc_ = sanitize_name(loc.replace(" ", "_"))
    print(f"Loading model for {loc}...")

    # Retrieve model from registry
    model = mr.get_model(
        name=f"xgb_avalanche_model_{loc_}",
        version=3  
    )

    print(model)
    
    # Download model artifacts
    model_dir = model.download()

    # Load XGBoost model
    xgb_model = XGBRegressor()
    xgb_model.load_model(
        f"{model_dir}/xgb_ordinal_model_more_features{loc_}.json"
    )

    # Store everything
    models[loc] = xgb_model
    model_dirs[loc] = model_dir

    print(f"✓ Model for {loc} loaded successfully\n")


Loading model for Sauda Ski Centre...
Model(name: 'xgb_avalanche_model_Sauda_Ski_Centre', version: 3)


Downloading: 0.000%|          | 0/396651 elapsed<00:00 remaining<?

✓ Model for Sauda Ski Centre loaded successfullyDONE

Loading model for Hemsedal Skisenter...
Model(name: 'xgb_avalanche_model_Hemsedal_Skisenter', version: 3)


Downloading: 0.000%|          | 0/335400 elapsed<00:00 remaining<?

✓ Model for Hemsedal Skisenter loaded successfullyNE

Loading model for Eikedalen Ski Center AS...
Model(name: 'xgb_avalanche_model_Eikedalen_Ski_Center_AS', version: 3)


Downloading: 0.000%|          | 0/366544 elapsed<00:00 remaining<?

✓ Model for Eikedalen Ski Center AS loaded successfully

Loading model for Myrkdalen Fjellandsby...
Model(name: 'xgb_avalanche_model_Myrkdalen_Fjellandsby', version: 3)


Downloading: 0.000%|          | 0/267407 elapsed<00:00 remaining<?

✓ Model for Myrkdalen Fjellandsby loaded successfully

Loading model for Rauland Skisenter...
Model(name: 'xgb_avalanche_model_Rauland_Skisenter', version: 3)


Downloading: 0.000%|          | 0/386919 elapsed<00:00 remaining<?

✓ Model for Rauland Skisenter loaded successfullyONE

Loading model for Bjorli Ski...
Model(name: 'xgb_avalanche_model_Bjorli_Ski', version: 3)


Downloading: 0.000%|          | 0/399508 elapsed<00:00 remaining<?

✓ Model for Bjorli Ski loaded successfullys)... DONE

Loading model for Strandafjellet Skisenter...
Model(name: 'xgb_avalanche_model_Strandafjellet_Skisenter', version: 3)


Downloading: 0.000%|          | 0/366828 elapsed<00:00 remaining<?

✓ Model for Strandafjellet Skisenter loaded successfully

Loading model for Voss Resort Fjellheisar...
Model(name: 'xgb_avalanche_model_Voss_Resort_Fjellheisar', version: 3)


Downloading: 0.000%|          | 0/601204 elapsed<00:00 remaining<?

✓ Model for Voss Resort Fjellheisar loaded successfully

Loading model for Galdhøpiggen Summer Ski Centre...
Model(name: 'xgb_avalanche_model_Galdhpiggen_Summer_Ski_Centre', version: 3)


Downloading: 0.000%|          | 0/393030 elapsed<00:00 remaining<?

✓ Model for Galdhøpiggen Summer Ski Centre loaded successfully

Loading model for Hovden Alpinsenter...
Model(name: 'xgb_avalanche_model_Hovden_Alpinsenter', version: 3)


Downloading: 0.000%|          | 0/335007 elapsed<00:00 remaining<?

✓ Model for Hovden Alpinsenter loaded successfullyNE

Loading model for Narvik Ski Resort...
Model(name: 'xgb_avalanche_model_Narvik_Ski_Resort', version: 3)


Downloading: 0.000%|          | 0/155542 elapsed<00:00 remaining<?

✓ Model for Narvik Ski Resort loaded successfullyONE

Loading model for Nedre fjellheisstasjon Narvik...
Model(name: 'xgb_avalanche_model_Nedre_fjellheisstasjon_Narvik', version: 3)


Downloading: 0.000%|          | 0/368873 elapsed<00:00 remaining<?

✓ Model for Nedre fjellheisstasjon Narvik loaded successfully



### Get Weather Forecast Features with Feature View

In [258]:
# Feature group for weather
aq_fg = fs.get_feature_group(
    name='weather_terrain_sensor',
    version=2,
)

aq_df = aq_fg.read().sort_values(by="date", ascending=False)

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (2.84s) 


In [260]:
# Sort batch data by date
batch_data_sorted = batch_data.sort_values(
    by="date", 
    ascending=False
)

# DataFrame of resorts considering only the most recent data
latest_7_per_location_weather = (
    aq_df
    .sort_values("date", ascending=False)
    .groupby("location", as_index=False)
    .head(7)
)

# Get todays value first
latest_7_per_location_weather = (latest_7_per_location_weather.sort_values(by="date",ascending=True))   

# DataFrame for the latest warnings for each resort
a = len(resorts)
df_warning_lag = batch_data_sorted.head(a)

# Dictionary to hold a DataFrame for each location
dfs_per_location = {}

# Loop over unique locations
for i, location in enumerate(latest_7_per_location_weather['location'].unique()):
    loc_ = sanitize_name(location)
    # Create a copy for the dictionary
    dfs_per_location[location] = latest_7_per_location_weather[latest_7_per_location_weather['location'] == location].copy()
    
    # Dynamically create a variable for each dataframe
    globals()[f'df_location_{loc_}'] = dfs_per_location[location]

# Feature columns
feature_cols = [
    "warning_level_lag_1", 
    "warning_level_lag_2",
    "warning_level_lag_3",
    "temperature_2m_mean",
    "precipitation_sum",
    "rain_sum",
    "snowfall_sum",
    "wind_speed_10m_max",
    "wind_direction_10m_dominant",
    "snow_load_steep",
    "wind_snow_transport",
    "rain_on_snow_risk",
    "temp_elev",
    "precip_slope_weighted",
]

In [262]:
# Add the lag features for today per location
for location in resorts.keys():
    loc_ = sanitize_name(location.replace(" ", "_"))
    df_name = f'df_location_{loc_}'
    df = globals()[df_name]

    #Reset index
    df.reset_index(drop=True, inplace=True)

    df['warning_level_lag_1'] = np.nan
    df['warning_level_lag_2'] = np.nan
    df['warning_level_lag_3'] = np.nan

    df_warning_lag_idx = df_warning_lag.set_index('location')

    if location in df_warning_lag_idx.index:
        df.loc[df.index[0],
               ['warning_level_lag_1',
                'warning_level_lag_2',
                'warning_level_lag_3']] = (
            df_warning_lag_idx.loc[location,
                ['warning_level_lag_1',
                 'warning_level_lag_2',
                 'warning_level_lag_3']]
        )

### Making the predictions

In [271]:
for location in resorts.keys():
    loc_ = sanitize_name(location.replace(" ", "_"))
    df = globals()[f'df_location_{loc_}']

    # Initialize lags from first row
    lag_1 = df.loc[0, 'warning_level_lag_1']
    lag_2 = df.loc[0, 'warning_level_lag_2']
    lag_3 = df.loc[0, 'warning_level_lag_3']

    for i, idx in enumerate(df.index[:7]):

        # Assign current lags to this row
        df.loc[idx, 'warning_level_lag_1'] = lag_1
        df.loc[idx, 'warning_level_lag_2'] = lag_2
        df.loc[idx, 'warning_level_lag_3'] = lag_3

        # Build features
        features = df.loc[[idx], feature_cols]

        # Predict
        prediction = predict(models[location], features)
        df.loc[i, 'predicted_risk_value'] = prediction

        # Shift lags for next day
        lag_3, lag_2, lag_1 = lag_2, lag_1, prediction
        df['days_before_forecast_day'] = range(1, len(df) + 1)

### Store prediction values into feature stores

In [273]:
import re

def sanitize_fg_name(name):
    # lowercase, replace spaces and non-alphanum with _
    name = name.lower()
    name = re.sub(r'[^a-z0-9]', '_', name)
    name = re.sub(r'_+', '_', name)  # collapse multiple underscores
    name = name.strip('_')  # remove leading/trailing underscores
    return name[:63]  # truncate to 63 chars


In [277]:
for location in resorts.keys():
    loc_ = sanitize_name(location.replace(" ", "_"))
    fg_name = sanitize_fg_name(f'aq_predictions_{loc_}')

    monitor_fg = fs.get_or_create_feature_group(
        name=fg_name,
        description='Avalanche prediction monitoring with lags',
        version=1,
        primary_key=['location', 'date', 'days_before_forecast_day'],
        event_time='date'
    )

    df = globals()[f'df_location_{loc_}']
    monitor_fg.insert(df, wait=True)


2026-01-11 19:33:46,246 INFO: Computing insert statistics
2026-01-11 19:34:03,730 INFO: Computing insert statistics
2026-01-11 19:34:20,736 INFO: Computing insert statistics
2026-01-11 19:34:37,814 INFO: Computing insert statistics
2026-01-11 19:34:54,443 INFO: Computing insert statistics
2026-01-11 19:35:11,454 INFO: Computing insert statistics
2026-01-11 19:35:28,389 INFO: Computing insert statistics
2026-01-11 19:35:45,366 INFO: Computing insert statistics
2026-01-11 19:36:02,263 INFO: Computing insert statistics
2026-01-11 19:36:19,071 INFO: Computing insert statistics
2026-01-11 19:36:36,542 INFO: Computing insert statistics
2026-01-11 19:36:53,739 INFO: Computing insert statistics
