# Model inference

1. Download model and batch inference data
2. Make predictions
3. Store predictions in a monitoring feature group

### Connect to Hopsworks

In [254]:
import hopsworks
import xgboost as xgb
import unicodedata
import re
from xgboost import XGBRegressor

# connect with Hopsworks
project = hopsworks.login(
        host="eu-west.cloud.hopsworks.ai",
        project="ID2223_Project",
        api_key_value=os.environ["HOPSWORKS_API_KEY"]
    )

# Get feature view
fs = project.get_feature_store()
fv = fs.get_feature_view('avalanche_warning_fv_new_corrected_more_features_and_lags', version=5)

# Get model registry
mr = project.get_model_registry()

2026-01-11 18:36:52,884 INFO: Closing external client and cleaning up certificates.
2026-01-11 18:36:52,889 INFO: Connection closed.
2026-01-11 18:36:52,891 INFO: Initializing external client
2026-01-11 18:36:52,892 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2026-01-11 18:36:53,774 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/2173


In [255]:
def sanitize_name(name):
    # Normalize Unicode to ASCII, ignore accents
    name_ascii = unicodedata.normalize('NFKD', name).encode('ASCII', 'ignore').decode()
    # Replace anything not a-z, A-Z, 0-9, or _ with underscore
    name_clean = re.sub(r'[^a-zA-Z0-9_]', '_', name_ascii)
    return name_clean

def predict(model: xgb.XGBRegressor, features_df: pd.DataFrame) -> float:
    """
    Predict avalanche risk
    """
    features_df = features_df.astype(float)
    return float(model.predict(features_df)[0])

In [256]:
# Create batch data for the feature view
batch_data = fv.get_batch_data(dataframe_type="pandas")

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (6.95s) 


### Download models from Model Registry

In [257]:
from xgboost import XGBRegressor

#Retrieve the name resorts
resorts = {loc: None for loc in batch_data["location"].unique()}

models = {}           
model_dirs = {}      

for loc in resorts.keys():  
    loc_ = sanitize_name(loc.replace(" ", "_"))
    print(f"Loading model for {loc}...")

    # Retrieve model from registry
    model = mr.get_model(
        name=f"xgb_avalanche_model_{loc_}",
        version=3  
    )

    print(model)
    
    # Download model artifacts
    model_dir = model.download()

    # Load XGBoost model
    xgb_model = XGBRegressor()
    xgb_model.load_model(
        f"{model_dir}/xgb_ordinal_model_more_features{loc_}.json"
    )

    # Store everything
    models[loc] = xgb_model
    model_dirs[loc] = model_dir

    print(f"✓ Model for {loc} loaded successfully\n")


Loading model for Sauda Ski Centre...
Model(name: 'xgb_avalanche_model_Sauda_Ski_Centre', version: 3)


Downloading: 0.000%|          | 0/396651 elapsed<00:00 remaining<?

✓ Model for Sauda Ski Centre loaded successfullyDONE

Loading model for Hemsedal Skisenter...
Model(name: 'xgb_avalanche_model_Hemsedal_Skisenter', version: 3)


Downloading: 0.000%|          | 0/335400 elapsed<00:00 remaining<?

✓ Model for Hemsedal Skisenter loaded successfullyNE

Loading model for Eikedalen Ski Center AS...
Model(name: 'xgb_avalanche_model_Eikedalen_Ski_Center_AS', version: 3)


Downloading: 0.000%|          | 0/366544 elapsed<00:00 remaining<?

✓ Model for Eikedalen Ski Center AS loaded successfully

Loading model for Myrkdalen Fjellandsby...
Model(name: 'xgb_avalanche_model_Myrkdalen_Fjellandsby', version: 3)


Downloading: 0.000%|          | 0/267407 elapsed<00:00 remaining<?

✓ Model for Myrkdalen Fjellandsby loaded successfully

Loading model for Rauland Skisenter...
Model(name: 'xgb_avalanche_model_Rauland_Skisenter', version: 3)


Downloading: 0.000%|          | 0/386919 elapsed<00:00 remaining<?

✓ Model for Rauland Skisenter loaded successfullyONE

Loading model for Bjorli Ski...
Model(name: 'xgb_avalanche_model_Bjorli_Ski', version: 3)


Downloading: 0.000%|          | 0/399508 elapsed<00:00 remaining<?

✓ Model for Bjorli Ski loaded successfullys)... DONE

Loading model for Strandafjellet Skisenter...
Model(name: 'xgb_avalanche_model_Strandafjellet_Skisenter', version: 3)


Downloading: 0.000%|          | 0/366828 elapsed<00:00 remaining<?

✓ Model for Strandafjellet Skisenter loaded successfully

Loading model for Voss Resort Fjellheisar...
Model(name: 'xgb_avalanche_model_Voss_Resort_Fjellheisar', version: 3)


Downloading: 0.000%|          | 0/601204 elapsed<00:00 remaining<?

✓ Model for Voss Resort Fjellheisar loaded successfully

Loading model for Galdhøpiggen Summer Ski Centre...
Model(name: 'xgb_avalanche_model_Galdhpiggen_Summer_Ski_Centre', version: 3)


Downloading: 0.000%|          | 0/393030 elapsed<00:00 remaining<?

✓ Model for Galdhøpiggen Summer Ski Centre loaded successfully

Loading model for Hovden Alpinsenter...
Model(name: 'xgb_avalanche_model_Hovden_Alpinsenter', version: 3)


Downloading: 0.000%|          | 0/335007 elapsed<00:00 remaining<?

✓ Model for Hovden Alpinsenter loaded successfullyNE

Loading model for Narvik Ski Resort...
Model(name: 'xgb_avalanche_model_Narvik_Ski_Resort', version: 3)


Downloading: 0.000%|          | 0/155542 elapsed<00:00 remaining<?

✓ Model for Narvik Ski Resort loaded successfullyONE

Loading model for Nedre fjellheisstasjon Narvik...
Model(name: 'xgb_avalanche_model_Nedre_fjellheisstasjon_Narvik', version: 3)


Downloading: 0.000%|          | 0/368873 elapsed<00:00 remaining<?

✓ Model for Nedre fjellheisstasjon Narvik loaded successfully



### Get Weather Forecast Features with Feature View

In [258]:
# Feature group for weather
aq_fg = fs.get_feature_group(
    name='weather_terrain_sensor',
    version=2,
)

aq_df = aq_fg.read().sort_values(by="date", ascending=False)

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (2.84s) 


In [259]:
latest_7_per_location_weather

Unnamed: 0,date,temperature_2m_mean,precipitation_sum,rain_sum,snowfall_sum,wind_speed_10m_max,wind_direction_10m_dominant,location,snow_load_steep,wind_snow_transport,rain_on_snow_risk,temp_elev,precip_slope_weighted
22170,2026-01-10 00:00:00+00:00,-19.944500,0.0,0.0,0.00,3.240000,270.000000,Hemsedal Skisenter,0.000000,0.382837,0.0,-11.462846,0.000000
22175,2026-01-10 00:00:00+00:00,-13.586500,0.0,0.0,0.00,1.800000,216.869980,Galdhøpiggen Summer Ski Centre,0.000000,0.346514,0.0,-7.039772,0.000000
22174,2026-01-10 00:00:00+00:00,-16.488001,0.0,0.0,0.00,1.938659,21.801476,Voss Resort Fjellheisar,0.000000,0.292608,0.0,-4.482924,0.000000
22173,2026-01-10 00:00:00+00:00,-11.367001,0.0,0.0,0.00,2.160000,180.000000,Eikedalen Ski Center AS,0.000000,0.349119,0.0,-4.378534,0.000000
22164,2026-01-10 00:00:00+00:00,-19.271000,0.0,0.0,0.00,0.804985,333.435028,Hovden Alpinsenter,0.000000,0.060642,0.0,-9.584816,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
22238,2026-01-16 00:00:00+00:00,-6.094500,0.1,0.0,0.07,9.746631,94.236320,Hemsedal Skisenter,0.004434,1.151657,0.0,-3.502736,1.639558
22237,2026-01-16 00:00:00+00:00,-0.936500,0.0,0.0,0.00,3.319036,167.471207,Galdhøpiggen Summer Ski Centre,0.000000,0.638939,0.0,-0.485242,0.000000
22236,2026-01-16 00:00:00+00:00,-6.522000,0.0,0.0,0.00,9.290511,125.537766,Bjorli Ski,0.000000,1.379674,0.0,-4.316296,0.000000
22240,2026-01-16 00:00:00+00:00,-0.252500,0.0,0.0,0.00,13.397612,149.300354,Strandafjellet Skisenter,0.000000,2.870462,0.0,-0.037345,0.000000


In [260]:
# Sort batch data by date
batch_data_sorted = batch_data.sort_values(
    by="date", 
    ascending=False
)

# DataFrame of resorts considering only the most recent data
latest_7_per_location_weather = (
    aq_df
    .sort_values("date", ascending=False)
    .groupby("location", as_index=False)
    .head(7)
)

# Get todays value first
latest_7_per_location_weather = (latest_7_per_location_weather.sort_values(by="date",ascending=True))   

# DataFrame for the latest warnings for each resort
a = len(resorts)
df_warning_lag = batch_data_sorted.head(a)

# Dictionary to hold a DataFrame for each location
dfs_per_location = {}

# Loop over unique locations
for i, location in enumerate(latest_7_per_location_weather['location'].unique()):
    loc_ = sanitize_name(location)
    # Create a copy for the dictionary
    dfs_per_location[location] = latest_7_per_location_weather[latest_7_per_location_weather['location'] == location].copy()
    
    # Dynamically create a variable for each dataframe
    globals()[f'df_location_{loc_}'] = dfs_per_location[location]

# Feature columns
feature_cols = [
    "warning_level_lag_1", 
    "warning_level_lag_2",
    "warning_level_lag_3",
    "temperature_2m_mean",
    "precipitation_sum",
    "rain_sum",
    "snowfall_sum",
    "wind_speed_10m_max",
    "wind_direction_10m_dominant",
    "snow_load_steep",
    "wind_snow_transport",
    "rain_on_snow_risk",
    "temp_elev",
    "precip_slope_weighted",
]

In [261]:
df_warning_lag

Unnamed: 0,location,date,warning_level_lag_1,warning_level_lag_2,warning_level_lag_3,temperature_2m_mean,precipitation_sum,rain_sum,snowfall_sum,wind_speed_10m_max,wind_direction_10m_dominant,snow_load_steep,wind_snow_transport,rain_on_snow_risk,temp_elev,precip_slope_weighted
21970,Nedre fjellheisstasjon Narvik,2026-01-11 00:00:00+00:00,2,3,3,-6.8155,0.0,0.0,0.0,9.339208,62.447273,0.0,0.25357,0.0,-0.155262,0.0
16423,Sauda Ski Centre,2026-01-11 00:00:00+00:00,2,2,2,-15.389,0.0,0.0,0.0,9.746631,85.763687,0.0,1.68746,0.0,-5.331246,0.0
19194,Voss Resort Fjellheisar,2026-01-11 00:00:00+00:00,2,2,2,-12.938,0.0,0.0,0.0,1.8,53.13002,0.0,0.27168,0.0,-3.517714,0.0
19193,Strandafjellet Skisenter,2026-01-11 00:00:00+00:00,2,2,2,-11.1525,0.0,0.0,0.0,8.404285,136.735672,0.0,1.800633,0.0,-1.649454,0.0
2770,Narvik Ski Resort,2026-01-11 00:00:00+00:00,2,3,3,-6.8155,0.0,0.0,0.0,9.339208,62.447273,0.0,0.279712,0.0,-0.113311,0.0
2769,Hovden Alpinsenter,2026-01-11 00:00:00+00:00,2,2,1,-16.971001,0.0,0.0,0.0,4.89653,143.972534,0.0,0.368871,0.0,-8.440865,0.0
4138,Eikedalen Ski Center AS,2026-01-11 00:00:00+00:00,2,2,2,-9.917,0.0,0.0,0.0,9.0,106.260277,0.0,1.454662,0.0,-3.819998,0.0
4139,Myrkdalen Fjellandsby,2026-01-11 00:00:00+00:00,2,2,2,-13.996,0.0,0.0,0.0,8.209263,142.124954,0.0,1.022776,0.0,-7.516764,0.0
8278,Bjorli Ski,2026-01-11 00:00:00+00:00,2,2,2,-16.271999,0.0,0.0,0.0,6.989936,124.508583,0.0,1.03803,0.0,-10.768901,0.0
1388,Hemsedal Skisenter,2026-01-11 00:00:00+00:00,1,1,1,-16.3445,0.0,0.0,0.0,1.8,180.0,0.0,0.212687,0.0,-9.393792,0.0


In [262]:
# Add the lag features for today per location
for location in resorts.keys():
    loc_ = sanitize_name(location.replace(" ", "_"))
    df_name = f'df_location_{loc_}'
    df = globals()[df_name]

    #Reset index
    df.reset_index(drop=True, inplace=True)

    df['warning_level_lag_1'] = np.nan
    df['warning_level_lag_2'] = np.nan
    df['warning_level_lag_3'] = np.nan

    df_warning_lag_idx = df_warning_lag.set_index('location')

    if location in df_warning_lag_idx.index:
        df.loc[df.index[0],
               ['warning_level_lag_1',
                'warning_level_lag_2',
                'warning_level_lag_3']] = (
            df_warning_lag_idx.loc[location,
                ['warning_level_lag_1',
                 'warning_level_lag_2',
                 'warning_level_lag_3']]
        )

In [263]:
df_location_Bjorli_Ski

Unnamed: 0,date,temperature_2m_mean,precipitation_sum,rain_sum,snowfall_sum,wind_speed_10m_max,wind_direction_10m_dominant,location,snow_load_steep,wind_snow_transport,rain_on_snow_risk,temp_elev,precip_slope_weighted,warning_level_lag_1,warning_level_lag_2,warning_level_lag_3
0,2026-01-11 00:00:00+00:00,-16.271999,0.0,0.0,0.0,6.989936,124.508583,Bjorli Ski,0.0,1.03803,0.0,-10.768901,0.0,2.0,2.0,2.0
1,2026-01-12 00:00:00+00:00,-12.422,0.1,0.0,0.07,15.315873,119.577744,Bjorli Ski,0.00567,2.274462,0.0,-8.22095,1.721069,,,
2,2026-01-13 00:00:00+00:00,-10.872001,0.0,0.0,0.0,12.313894,127.875046,Bjorli Ski,0.0,1.828657,0.0,-7.195152,0.0,,,
3,2026-01-14 00:00:00+00:00,-5.872,0.2,0.0,0.14,13.397612,120.699646,Bjorli Ski,0.01134,1.989593,0.0,-3.886123,3.442139,,,
4,2026-01-15 00:00:00+00:00,-5.622,0.0,0.0,0.0,7.704336,127.405434,Bjorli Ski,0.0,1.144121,0.0,-3.720671,0.0,,,
5,2026-01-16 00:00:00+00:00,-6.322,0.0,0.0,0.0,6.109403,135.000107,Bjorli Ski,0.0,0.907268,0.0,-4.183935,0.0,,,
6,2026-01-17 00:00:00+00:00,-6.572,0.0,0.0,0.0,6.193674,144.462234,Bjorli Ski,0.0,0.919783,0.0,-4.349387,0.0,,,


### Making the predictions

In [271]:
for location in resorts.keys():
    loc_ = sanitize_name(location.replace(" ", "_"))
    df = globals()[f'df_location_{loc_}']

    # Initialize lags from first row
    lag_1 = df.loc[0, 'warning_level_lag_1']
    lag_2 = df.loc[0, 'warning_level_lag_2']
    lag_3 = df.loc[0, 'warning_level_lag_3']

    for i, idx in enumerate(df.index[:7]):

        # Assign current lags to this row
        df.loc[idx, 'warning_level_lag_1'] = lag_1
        df.loc[idx, 'warning_level_lag_2'] = lag_2
        df.loc[idx, 'warning_level_lag_3'] = lag_3

        # Build features
        features = df.loc[[idx], feature_cols]

        # Predict
        prediction = predict(models[location], features)
        df.loc[i, 'predicted_risk_value'] = prediction

        # Shift lags for next day
        lag_3, lag_2, lag_1 = lag_2, lag_1, prediction
        df['days_before_forecast_day'] = range(1, len(df) + 1)

In [272]:
# Example of predictions for one resort
df_location_Bjorli_Ski

Unnamed: 0,date,temperature_2m_mean,precipitation_sum,rain_sum,snowfall_sum,wind_speed_10m_max,wind_direction_10m_dominant,location,snow_load_steep,wind_snow_transport,rain_on_snow_risk,temp_elev,precip_slope_weighted,warning_level_lag_1,warning_level_lag_2,warning_level_lag_3,predicted_risk_value,days_before_forecast_day
0,2026-01-11 00:00:00+00:00,-16.271999,0.0,0.0,0.0,6.989936,124.508583,Bjorli Ski,0.0,1.03803,0.0,-10.768901,0.0,2.0,2.0,2.0,2.036374,1
1,2026-01-12 00:00:00+00:00,-12.422,0.1,0.0,0.07,15.315873,119.577744,Bjorli Ski,0.00567,2.274462,0.0,-8.22095,1.721069,2.036374,2.0,2.0,2.084401,2
2,2026-01-13 00:00:00+00:00,-10.872001,0.0,0.0,0.0,12.313894,127.875046,Bjorli Ski,0.0,1.828657,0.0,-7.195152,0.0,2.084401,2.036374,2.0,2.016418,3
3,2026-01-14 00:00:00+00:00,-5.872,0.2,0.0,0.14,13.397612,120.699646,Bjorli Ski,0.01134,1.989593,0.0,-3.886123,3.442139,2.016418,2.084401,2.036374,1.945868,4
4,2026-01-15 00:00:00+00:00,-5.622,0.0,0.0,0.0,7.704336,127.405434,Bjorli Ski,0.0,1.144121,0.0,-3.720671,0.0,1.945868,2.016418,2.084401,1.214123,5
5,2026-01-16 00:00:00+00:00,-6.322,0.0,0.0,0.0,6.109403,135.000107,Bjorli Ski,0.0,0.907268,0.0,-4.183935,0.0,1.214123,1.945868,2.016418,1.118617,6
6,2026-01-17 00:00:00+00:00,-6.572,0.0,0.0,0.0,6.193674,144.462234,Bjorli Ski,0.0,0.919783,0.0,-4.349387,0.0,1.118617,1.214123,1.945868,1.102013,7


### Store prediction values into feature stores

In [273]:
import re

def sanitize_fg_name(name):
    # lowercase, replace spaces and non-alphanum with _
    name = name.lower()
    name = re.sub(r'[^a-z0-9]', '_', name)
    name = re.sub(r'_+', '_', name)  # collapse multiple underscores
    name = name.strip('_')  # remove leading/trailing underscores
    return name[:63]  # truncate to 63 chars


In [274]:
for location in resorts.keys():
    loc_ = sanitize_name(location.replace(" ", "_"))
    fg_name = sanitize_fg_name(f'aq_predictions_{loc_}')

    monitor_fg = fs.get_or_create_feature_group(
        name=fg_name,
        description='Avalanche prediction monitoring with lags',
        version=1,
        primary_key=['location', 'date', 'days_before_forecast_day'],
        event_time='date'
    )

    df = globals()[f'df_location_{loc_}']
    monitor_fg.insert(df, wait=True)


2026-01-11 18:53:02,267 INFO: Computing insert statistics
2026-01-11 18:53:16,522 INFO: Computing insert statistics
2026-01-11 18:53:30,490 INFO: Computing insert statistics
2026-01-11 18:53:44,052 INFO: Computing insert statistics
2026-01-11 18:53:58,421 INFO: Computing insert statistics
2026-01-11 18:54:12,622 INFO: Computing insert statistics
2026-01-11 18:54:26,882 INFO: Computing insert statistics
2026-01-11 18:54:41,350 INFO: Computing insert statistics
2026-01-11 18:54:55,586 INFO: Computing insert statistics
2026-01-11 18:55:09,731 INFO: Computing insert statistics
2026-01-11 18:55:24,602 INFO: Computing insert statistics
2026-01-11 18:55:40,562 INFO: Computing insert statistics
