# Model Training

1. Retrieve all feature groups
2. Join features
3. Create a model per resort location (filter by location + sort)
4. Train XGBoost + validation
5. Check performance

In [62]:
import xgboost
import pandas as pd
import datetime
import hopsworks
import warnings
import matplotlib.pyplot as plt
import os
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, mean_absolute_error
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit
from scipy.stats import uniform, randint
from locations import *

warnings.filterwarnings("ignore", module="IPython")


In [63]:
project = hopsworks.login(
    host="eu-west.cloud.hopsworks.ai",             # DNS of your Hopsworks instance
    project="ID2223_Project"
)

fs = project.get_feature_store()

2025-12-31 15:58:35,819 INFO: Closing external client and cleaning up certificates.
2025-12-31 15:58:35,832 INFO: Connection closed.
2025-12-31 15:58:35,839 INFO: Initializing external client
2025-12-31 15:58:35,840 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2025-12-31 15:58:40,894 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/2173


## Create Feature View

In [64]:
warning_fg = fs.get_feature_group(
    name='avalanche_warning',
    version=4
)

weather_fg = fs.get_feature_group(
    name="weather_terrain_sensor",
    version=2
)

In [65]:
selected_features = (
    warning_fg
        .select(["location", "date", "warning_level"])
        .join(
            weather_fg.select([
                "temperature_2m_mean",
                "precipitation_sum",
                "rain_sum",
                "snowfall_sum",
                "wind_speed_10m_max",
                "wind_direction_10m_dominant",
                "snow_load_steep",
                "wind_snow_transport",
                "rain_on_snow_risk",
                "temp_elev",
                "precip_slope_weighted"]),
            on=["location", "date"]
        )
)
selected_features.features


[Feature('location', None, None, False, False, False, None, None, None),
 Feature('date', None, None, False, False, False, None, None, None),
 Feature('temperature_2m_mean', None, None, False, False, False, None, None, None),
 Feature('precipitation_sum', None, None, False, False, False, None, None, None),
 Feature('rain_sum', None, None, False, False, False, None, None, None),
 Feature('snowfall_sum', None, None, False, False, False, None, None, None),
 Feature('wind_speed_10m_max', None, None, False, False, False, None, None, None),
 Feature('wind_direction_10m_dominant', None, None, False, False, False, None, None, None),
 Feature('snow_load_steep', None, None, False, False, False, None, None, None),
 Feature('wind_snow_transport', None, None, False, False, False, None, None, None),
 Feature('rain_on_snow_risk', None, None, False, False, False, None, None, None),
 Feature('temp_elev', None, None, False, False, False, None, None, None),
 Feature('precip_slope_weighted', None, None, F

In [None]:
feature_view = fs.create_feature_view(
    name="avalanche_warning_fv_new_corrected_more_features",
    version=2,
    description="Feature view combining avalanche warnings, weather forecasts, and static terrain features for Norwegian ski resorts.",
    query=selected_features,
    labels=["warning_level"]
)

## Train-test splits

In [68]:
fv = fs.get_feature_view(
    name="avalanche_warning_fv_new_corrected_more_features",
    version=2
)

In [69]:
start_date_test_data = "2025-06-30"
# Convert string to datetime object
test_start = datetime.datetime.strptime(start_date_test_data, "%Y-%m-%d")
X_train, X_test, y_train, y_test = fv.train_test_split(
    test_start=test_start
)

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (5.28s) 
2025-12-31 16:01:50,078 INFO: Computing insert statistics
2025-12-31 16:01:50,443 INFO: Computing insert statistics



In [70]:
print(len(X_train[X_train['location'] == 'Galdhøpiggen Summer Ski Centre'].sort_values(by='date')))
X_train[X_train['location'] == 'Galdhøpiggen Summer Ski Centre']

1643


Unnamed: 0,location,date,temperature_2m_mean,precipitation_sum,rain_sum,snowfall_sum,wind_speed_10m_max,wind_direction_10m_dominant,snow_load_steep,wind_snow_transport,rain_on_snow_risk,temp_elev,precip_slope_weighted
228,Galdhøpiggen Summer Ski Centre,2021-02-03 00:00:00+00:00,-19.653915,0.0,0.0,0.00,3.054701,212.763855,0.000000,0.588053,0.000000,-10.183571,0.000000
229,Galdhøpiggen Summer Ski Centre,2021-02-08 00:00:00+00:00,-21.889334,0.0,0.0,0.00,7.928177,223.419846,0.000000,1.526234,0.000000,-11.341841,0.000000
230,Galdhøpiggen Summer Ski Centre,2021-02-09 00:00:00+00:00,-21.543503,0.0,0.0,0.00,7.993297,215.749039,0.000000,1.538770,0.000000,-11.162651,0.000000
231,Galdhøpiggen Summer Ski Centre,2021-02-20 00:00:00+00:00,-2.324750,1.3,0.0,0.91,6.915374,50.984947,0.109549,1.331262,0.000000,-1.204557,24.578027
232,Galdhøpiggen Summer Ski Centre,2021-03-16 00:00:00+00:00,-6.147667,0.0,0.0,0.00,6.924738,205.346115,0.000000,1.333064,0.000000,-3.185381,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
20834,Galdhøpiggen Summer Ski Centre,2025-05-27 00:00:00+00:00,5.391917,4.2,4.2,0.00,10.120099,108.325500,0.000000,1.948195,4.352415,2.793793,79.405919
20835,Galdhøpiggen Summer Ski Centre,2025-06-08 00:00:00+00:00,7.673166,4.1,3.8,0.21,8.647496,273.208221,0.025281,1.664708,3.937899,3.975810,77.515304
20836,Galdhøpiggen Summer Ski Centre,2025-06-11 00:00:00+00:00,6.614834,1.9,1.1,0.56,11.638917,294.712402,0.067415,2.240579,1.139918,3.427441,35.921731
20837,Galdhøpiggen Summer Ski Centre,2025-06-16 00:00:00+00:00,11.698166,0.1,0.1,0.00,14.861722,256.234772,0.000000,2.860993,0.103629,6.061342,1.890617


In [77]:
def prepare_data(X, y_enc, location):
    df = (
        X.assign(label=y_enc)
        .loc[X["location"] == location]
        .sort_values("date")
        .reset_index(drop=True)
    )

    X_loc = df.drop(columns=["location", "date", "label"])
    y_loc = df["label"].to_numpy()

    return X_loc, y_loc

In [78]:
if os.path.exists("models") == False:
    os.mkdir("models")

label_encoder = LabelEncoder()
y_train_enc = label_encoder.fit_transform(y_train.to_numpy().ravel())
y_test_enc  = label_encoder.transform(y_test.to_numpy().ravel())

param_distributions = {
    "n_estimators": randint(200, 1200),
    "max_depth": randint(3, 12),
    "learning_rate": uniform(0.01, 0.2),
    "subsample": uniform(0.5, 0.5),
    "colsample_bytree": uniform(0.5, 0.5),
    "gamma": uniform(0, 5)
}

metrics = {}

for loc in resorts.keys():
    
    print(f"Training and evaluating model for location: {loc}")

    X_train_loc, y_train_loc = prepare_data(X_train, y_train_enc, loc)
    X_test_loc,  y_test_loc  = prepare_data(X_test,  y_test_enc,  loc)

    tscv = TimeSeriesSplit(n_splits=5)

    model = xgboost.XGBClassifier(
        objective="multi:softprob",
        num_class=len(label_encoder.classes_),
        eval_metric="mlogloss",
        random_state=42
   )

    random_search = RandomizedSearchCV(
        estimator=model,
        param_distributions=param_distributions,
        n_iter=50,
        scoring="accuracy",
        cv=tscv,
        verbose=2,
        n_jobs=-1,
        random_state=42
    )

    random_search.fit(X_train_loc, y_train_loc)
    best_model = random_search.best_estimator_
    
    best_model.save_model(f"models/xgb_model_{loc.replace(' ', '_')}.json")
    
    y_pred_enc = best_model.predict(X_test_loc)
    acc_score = accuracy_score(y_test_loc, y_pred_enc)
    metrics[loc] = acc_score

    print(f"Accuracy Score: {acc_score:.3f}")
       


Training and evaluating model for location: Narvik Ski Resort
Fitting 5 folds for each of 50 candidates, totalling 250 fits
Accuracy Score: 0.703
Training and evaluating model for location: Strandafjellet Skisenter
Fitting 5 folds for each of 50 candidates, totalling 250 fits
Accuracy Score: 0.735
Training and evaluating model for location: Voss Resort Fjellheisar
Fitting 5 folds for each of 50 candidates, totalling 250 fits
Accuracy Score: 0.692
Training and evaluating model for location: Myrkdalen Fjellandsby
Fitting 5 folds for each of 50 candidates, totalling 250 fits
Accuracy Score: 0.681
Training and evaluating model for location: Nedre fjellheisstasjon Narvik
Fitting 5 folds for each of 50 candidates, totalling 250 fits
Accuracy Score: 0.703
Training and evaluating model for location: Eikedalen Ski Center AS
Fitting 5 folds for each of 50 candidates, totalling 250 fits
Accuracy Score: 0.692
Training and evaluating model for location: Hemsedal Skisenter
Fitting 5 folds for each o

In [80]:
if os.path.exists("models") == False:
    os.mkdir("models")

y_train_ord = y_train.to_numpy().ravel().astype(float)
y_test_ord  = y_test.to_numpy().ravel().astype(float)


param_distributions = {
    "n_estimators": randint(200, 1200),
    "max_depth": randint(3, 12),
    "learning_rate": uniform(0.01, 0.2),
    "subsample": uniform(0.5, 0.5),
    "colsample_bytree": uniform(0.5, 0.5),
    "gamma": uniform(0, 5)
}

for loc in resorts.keys():

    print(f"\nTraining and evaluating model for location: {loc}")

    X_train_loc, y_train_loc = prepare_data(X_train, y_train_ord, loc)
    X_test_loc,  y_test_loc  = prepare_data(X_test,  y_test_ord,  loc)

    tscv = TimeSeriesSplit(n_splits=5)

    model = xgboost.XGBRegressor(
        objective="reg:squarederror",
        eval_metric="rmse",
        random_state=42
    )

    random_search = RandomizedSearchCV(
        estimator=model,
        param_distributions=param_distributions,
        n_iter=50,
        scoring="neg_mean_absolute_error",
        cv=tscv,
        n_jobs=-1,
        verbose=2,
        random_state=42
    )

    random_search.fit(X_train_loc, y_train_loc)
    best_model = random_search.best_estimator_

    # Save model
    model_path = f"models/xgb_ordinal_model_{loc.replace(' ', '_')}.json"
    best_model.save_model(model_path)

    # Predict (continuous)
    y_pred_cont = best_model.predict(X_test_loc)

    # Convert to ordinal classes
    y_pred_ord = np.round(y_pred_cont)
    y_pred_ord = np.clip(y_pred_ord, 0, 5)

    # Metrics
    mae = mean_absolute_error(y_test_loc, y_pred_ord)
    acc = accuracy_score(y_test_loc, y_pred_ord)
    print(y_pred_ord)
    print(y_test_ord)

    print(f"MAE: {mae:.3f}")
    print(f"Rounded Accuracy: {acc:.3f}")


Training and evaluating model for location: Narvik Ski Resort
Fitting 5 folds for each of 50 candidates, totalling 250 fits
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 0. 1. 2. 1. 1. 2. 2. 1. 1. 1. 1. 1. 1. 1.
 0. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 2. 2. 2. 1. 1. 1. 2. 2. 2. 1. 2. 1. 2.
 2. 2. 2. 2. 1. 2. 2. 2. 2. 2. 2. 2. 2. 1. 1. 1. 1. 1. 1. 2. 1. 2. 2. 2.
 2. 1. 1. 1. 2. 1. 2. 2. 1. 3. 1. 1. 3. 3. 2. 2. 2.]
[0. 0. 0. ... 0. 0. 1.]
MAE: 0.465
Rounded Accuracy: 0.649

Training and evaluating model for location: Strandafjellet Skisenter
Fitting 5 folds for each of 50 candidates, totalling 250 fits
[0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0

## Save to model registry

In [81]:
model_registry = project.get_model_registry()
#use hash of location name to create unique model name
for loc in resorts.keys():
    loc_ = loc.replace(' ', '_')
    hash_loc = abs(hash(loc_)) % (10 ** 8)
    model_path = f"models/xgb_model_{loc_}.json"
    model_ = model_registry.python.create_model(
        name=f"xgb_avalanche_model_{hash_loc}",
        description=f"XGBoost model for avalanche warning level prediction at {loc}",
        feature_view=feature_view,
        metrics ={"accuracy": metrics[loc]},
    )
    model_.save(model_path)

  0%|          | 0/6 [00:00<?, ?it/s]

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\models/xgb_model_Narvik…

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\model_schema.json: 0.00…

Model created, explore it at https://eu-west.cloud.hopsworks.ai:443/p/2173/models/xgb_avalanche_model_91344179/2


  0%|          | 0/6 [00:00<?, ?it/s]

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\models/xgb_model_Strand…

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\model_schema.json: 0.00…

Model created, explore it at https://eu-west.cloud.hopsworks.ai:443/p/2173/models/xgb_avalanche_model_57866374/2


  0%|          | 0/6 [00:00<?, ?it/s]

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\models/xgb_model_Voss_R…

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\model_schema.json: 0.00…

Model created, explore it at https://eu-west.cloud.hopsworks.ai:443/p/2173/models/xgb_avalanche_model_62405589/2


  0%|          | 0/6 [00:00<?, ?it/s]

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\models/xgb_model_Myrkda…

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\model_schema.json: 0.00…

Model created, explore it at https://eu-west.cloud.hopsworks.ai:443/p/2173/models/xgb_avalanche_model_80045427/2


  0%|          | 0/6 [00:00<?, ?it/s]

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\models/xgb_model_Nedre_…

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\model_schema.json: 0.00…

Model created, explore it at https://eu-west.cloud.hopsworks.ai:443/p/2173/models/xgb_avalanche_model_61248981/2


  0%|          | 0/6 [00:00<?, ?it/s]

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\models/xgb_model_Eikeda…

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\model_schema.json: 0.00…

Model created, explore it at https://eu-west.cloud.hopsworks.ai:443/p/2173/models/xgb_avalanche_model_32331301/2


  0%|          | 0/6 [00:00<?, ?it/s]

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\models/xgb_model_Hemsed…

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\model_schema.json: 0.00…

Model created, explore it at https://eu-west.cloud.hopsworks.ai:443/p/2173/models/xgb_avalanche_model_26965687/2


  0%|          | 0/6 [00:00<?, ?it/s]

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\models/xgb_model_Raulan…

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\model_schema.json: 0.00…

Model created, explore it at https://eu-west.cloud.hopsworks.ai:443/p/2173/models/xgb_avalanche_model_54834800/2


  0%|          | 0/6 [00:00<?, ?it/s]

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\models/xgb_model_Galdhø…

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\model_schema.json: 0.00…

Model created, explore it at https://eu-west.cloud.hopsworks.ai:443/p/2173/models/xgb_avalanche_model_2208980/2


  0%|          | 0/6 [00:00<?, ?it/s]

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\models/xgb_model_Sauda_…

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\model_schema.json: 0.00…

Model created, explore it at https://eu-west.cloud.hopsworks.ai:443/p/2173/models/xgb_avalanche_model_25562069/2


  0%|          | 0/6 [00:00<?, ?it/s]

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\models/xgb_model_Hovden…

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\model_schema.json: 0.00…

Model created, explore it at https://eu-west.cloud.hopsworks.ai:443/p/2173/models/xgb_avalanche_model_37171150/2


  0%|          | 0/6 [00:00<?, ?it/s]

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\models/xgb_model_Bjorli…

Uploading c:\Users\klara\Universidade\Year 2\Scalable ML and DL\Project\ID2223-Project\model_schema.json: 0.00…

Model created, explore it at https://eu-west.cloud.hopsworks.ai:443/p/2173/models/xgb_avalanche_model_95889948/2
