# Modelling Selected Parameters at 0:00 + Day Phase

## HistGradientBoostingRegressor - Hyperparameter Tuning

In [1]:
import pandas as pd

df = pd.read_csv('train_data.csv')
df.head()

Unnamed: 0,bg,insulin,hr,cals,bg+1:00,day_phase_evening,day_phase_morning,day_phase_night,day_phase_noon
0,2.275461,-0.15248,-0.118165,-0.353413,13.4,False,True,False,False
1,2.041896,-0.15248,-0.118165,-0.353413,12.8,False,True,False,False
2,1.875063,-0.15248,-0.118165,-0.353413,15.5,False,True,False,False
3,1.841697,-0.15248,-0.118165,-0.353413,14.8,False,True,False,False
4,1.708231,-0.15248,-0.118165,-0.353413,12.7,False,True,False,False


In [2]:
X = df.drop(columns=['bg+1:00'])
y = df['bg+1:00']

In [3]:
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.model_selection import GridSearchCV, KFold

param_grid = {
    'learning_rate': [0.01, 0.05, 0.1],
    'max_iter': [100, 200, 300],
    'max_leaf_nodes': [31, 63, 127],
    'max_depth': [3, 5, 7],
    'min_samples_leaf': [20, 50, 100],
    'l2_regularization': [0.0, 0.1, 0.5],
    'max_bins': [255, 511]
}

kf = KFold(n_splits=5, shuffle=True, random_state=42)
hgb = HistGradientBoostingRegressor()
grid_search = GridSearchCV(estimator=hgb, param_grid=param_grid, cv=kf, scoring='neg_mean_squared_error', n_jobs=-1)

In [4]:
# Fit the model
grid_search.fit(X=X, y=y)

# Get the best parameters and best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best parameters found: ", best_params)
print("Best score: ", best_score)

import warnings

warnings.filterwarnings('ignore')

3645 fits failed out of a total of 7290.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
3645 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/ralf/Projects/learning-projects/datascience-bootcamp/sep24_bds_int_medical/.venv/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/ralf/Projects/learning-projects/datascience-bootcamp/sep24_bds_int_medical/.venv/lib/python3.12/site-packages/sklearn/base.py", line 1466, in wrapper
    estimator._validate_params()
  File "/Users/ralf/Projects/learning-projects/datascience-bootcamp/sep24_bds_int_medical/.venv/lib/python3.12/site-packages/sklearn/base.

Best parameters found:  {'l2_regularization': 0.5, 'learning_rate': 0.1, 'max_bins': 255, 'max_depth': 7, 'max_iter': 300, 'max_leaf_nodes': 63, 'min_samples_leaf': 20}
Best score:  -3.9820805745335037


In [5]:
# train and save model
hgb = HistGradientBoostingRegressor(**best_params)

In [6]:
# save model
import joblib

joblib.dump(hgb, 'hgb.model.pkl')

['hgb.model.pkl']