# Modelling Selected Parameters at 0:00 + Day Phase

## XGBoostRegressor - Hyperparameter Tuning

In [1]:
import pandas as pd
import os

Unnamed: 0,bg-0:00,insulin-0:00,hr-0:00,cals-0:00,bg+1:00,day_phase_evening,day_phase_morning,day_phase_night,day_phase_noon
0,2.275154,-0.152482,0.929993,-0.458394,13.4,False,True,False,False
1,2.041687,-0.152482,0.929993,-0.458394,12.8,False,True,False,False
2,1.874925,-0.152482,0.929993,-0.458394,15.5,False,True,False,False
3,1.841572,-0.152482,0.929993,-0.458394,14.8,False,True,False,False
4,1.708162,-0.152482,0.929993,-0.458394,12.7,False,True,False,False


In [None]:
# Load, preprocess and standardize the train data
from pipelines import pipeline

data_file = os.path.join('..', '..', '..', '..', 'data', 'raw', 'train.csv')
train_data = pd.read_csv(data_file, index_col=0, low_memory=False)
train_data = pipeline.fit_transform(train_data)
train_data.head()

In [2]:
X = train_data.drop(columns=['bg+1:00'])
y = train_data['bg+1:00']

In [3]:
import numpy as np
from skopt import BayesSearchCV
from skopt.space import Integer, Real
from xgboost import XGBRegressor

param_space = {
    'n_estimators': Integer(50, 500),
    'learning_rate': Real(0.01, 1, prior='log-uniform'),
    'max_depth': Integer(3, 20),
    'subsample': Real(0.5, 1.0),
    'colsample_bytree': Real(0.5, 1.0),
    'gamma': Real(0, 30),
    'min_child_weight': Integer(1, 10)
}

xgb_regressor = XGBRegressor(objective='reg:squarederror', random_state=42, tree_method='hist')

opt = BayesSearchCV(
    estimator=xgb_regressor,
    search_spaces=param_space,
    n_iter=30,
    scoring='neg_mean_squared_error',
    cv=5,
    n_jobs=-1,
    random_state=42
)

np.int = int
opt.fit(X=X, y=y)

# Get the best parameters and best score
best_params = opt.best_params_
best_score = opt.best_score_

print("Best parameters found: ", best_params)
print("Best score: ", best_score)

Best parameters found:  OrderedDict({'colsample_bytree': 1.0, 'gamma': 28.76664765696484, 'learning_rate': 0.10288409106897808, 'max_depth': 3, 'min_child_weight': 10, 'n_estimators': 50, 'subsample': 1.0})
Best score:  -4.5584569824890355


In [4]:
# create and save model with best parameters
import joblib

xgb = XGBRegressor(**best_params)
joblib.dump(xgb, 'xgb.model.pkl')

['xgb.model.pkl']