# Modelling Selected Parameters at 0:00 + Day Phase

## XGBoostRegressor - Hyperparameter Tuning

In [1]:
import pandas as pd

df = pd.read_csv('train_data.csv')
df.head()

Unnamed: 0,bg,insulin,hr,cals,bg+1:00,day_phase_evening,day_phase_morning,day_phase_night,day_phase_noon
0,2.275461,-0.15248,-0.118165,-0.353413,13.4,False,True,False,False
1,2.041896,-0.15248,-0.118165,-0.353413,12.8,False,True,False,False
2,1.875063,-0.15248,-0.118165,-0.353413,15.5,False,True,False,False
3,1.841697,-0.15248,-0.118165,-0.353413,14.8,False,True,False,False
4,1.708231,-0.15248,-0.118165,-0.353413,12.7,False,True,False,False


In [2]:
X = df.drop(columns=['bg+1:00'])
y = df['bg+1:00']

In [3]:
from sklearn.model_selection import GridSearchCV, KFold
from xgboost import XGBRegressor

param_grid = {
    'n_estimators': [100, 200],
    'learning_rate': [0.01, 0.1, 0.5, 1],
    'max_depth': [3, 7],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0],
    'gamma': [0, 0.25, 0.5],
}
kf = KFold(n_splits=5, shuffle=True, random_state=42)
xgb = XGBRegressor()
grid_search = GridSearchCV(estimator=xgb, param_grid=param_grid, cv=kf, scoring='neg_mean_squared_error', n_jobs=-1)

# Fit the model
grid_search.fit(X=X, y=y)

# Get the best parameters and best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best parameters found: ", best_params)
print("Best score: ", best_score)

Best parameters found:  {'colsample_bytree': 1.0, 'gamma': 0.5, 'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 200, 'subsample': 0.8}
Best score:  -3.967959245637135


In [6]:
# create and save model with best parameters
import joblib
xgb = XGBRegressor(**best_params)
joblib.dump(xgb, 'xgb.model.pkl')

['xgb.model.pkl']