# Modelling Selected Parameters at 0:00 + Day Phase

## RidgeCV - Hyperparameter Tuning

In [1]:
import pandas as pd
from sklearn.model_selection import GridSearchCV, KFold

df = pd.read_csv('train_data.csv')
df.head()

Unnamed: 0,bg-0:00,insulin-0:00,hr-0:00,cals-0:00,bg+1:00,day_phase_evening,day_phase_morning,day_phase_night,day_phase_noon
0,2.275154,-0.152482,0.929993,-0.458394,13.4,False,True,False,False
1,2.041687,-0.152482,0.929993,-0.458394,12.8,False,True,False,False
2,1.874925,-0.152482,0.929993,-0.458394,15.5,False,True,False,False
3,1.841572,-0.152482,0.929993,-0.458394,14.8,False,True,False,False
4,1.708162,-0.152482,0.929993,-0.458394,12.7,False,True,False,False


In [2]:
X = df.drop(columns=['bg+1:00'])
y = df['bg+1:00']

In [3]:
from sklearn.linear_model import RidgeCV

param_grid = {
    'alpha_per_target': [True, False],
    'alphas': [[0.1, 1.0, 10.0], [0.1, 0.5, 1.0], [0.1, 0.5, 1.0, 5.0], [0.1, 0.5, 1.0, 5.0, 10.0]],
    'fit_intercept': [True, False]
}

kf = KFold(n_splits=5, shuffle=True, random_state=42)
ridge = RidgeCV()
grid_search = GridSearchCV(estimator=ridge, param_grid=param_grid, cv=kf, scoring='neg_mean_squared_error', n_jobs=-1)

grid_search.fit(X=X, y=y)

# Get the best parameters and best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best parameters found: ", best_params)
print("Best score: ", best_score)

Best parameters found:  {'alpha_per_target': True, 'alphas': [0.1, 1.0, 10.0], 'fit_intercept': True}
Best score:  -4.650405998790541


In [4]:
# create and save model with best parameters
import joblib

ridge = RidgeCV(**best_params)
joblib.dump(ridge, 'ridge.model.pkl')

['ridge.model.pkl']