# Modelling Selected Parameters at 0:00 + Day Phase

## Predicting the blood glucose levels at 1:00

In [1]:
import os
import joblib
import pandas as pd

stored_models = [file for file in os.listdir() if file.endswith('.model.pkl')]
stored_models

['ridge.model.pkl', 'xgb.model.pkl', 'hgb.model.pkl']

In [2]:
from sklearn.ensemble import VotingRegressor

estimators = []
for model_file in stored_models:
    print(f'Loading {model_file}')
    model = joblib.load(model_file)
    estimators.append((
        model_file.replace('.model.pkl', ''),
        model
    ))

estimators

Loading ridge.model.pkl
Loading xgb.model.pkl
Loading hgb.model.pkl


[('ridge', RidgeCV(alpha_per_target=True, alphas=[0.1, 1.0, 10.0])),
 ('xgb',
  XGBRegressor(base_score=None, booster=None, callbacks=None,
               colsample_bylevel=None, colsample_bynode=None,
               colsample_bytree=1.0, device=None, early_stopping_rounds=None,
               enable_categorical=False, eval_metric=None, feature_types=None,
               gamma=28.76664765696484, grow_policy=None, importance_type=None,
               interaction_constraints=None, learning_rate=0.10288409106897808,
               max_bin=None, max_cat_threshold=None, max_cat_to_onehot=None,
               max_delta_step=None, max_depth=3, max_leaves=None,
               min_child_weight=10, missing=nan, monotone_constraints=None,
               multi_strategy=None, n_estimators=50, n_jobs=None,
               num_parallel_tree=None, random_state=None, ...)),
 ('hgb',
  HistGradientBoostingRegressor(l2_regularization=0.1402357937318292,
                                learning_rate=0.0254

In [3]:
# Load, preprocess and standardize the train data 
from pipelines import pipeline

data_file = os.path.join('..', '..', '..', '..', 'data', 'raw', 'train.csv')
train_data = pd.read_csv(data_file, index_col=0, low_memory=False)
train_data = pipeline.fit_transform(train_data)

In [4]:
# Split the data into X and y
X_train = train_data.drop(columns=['bg+1:00'])
y_train = train_data['bg+1:00']

In [5]:
# Create anf train the ensemble model
ensemble = VotingRegressor(estimators=estimators, verbose=True)
ensemble.fit(X=X_train, y=y_train)

[Voting] .................... (1 of 3) Processing ridge, total=   0.1s
[Voting] ...................... (2 of 3) Processing xgb, total=   0.3s
[Voting] ...................... (3 of 3) Processing hgb, total=   2.1s


In [6]:
from sklearn.metrics import root_mean_squared_error, r2_score

y_pred = ensemble.predict(X_train)
print(f'RMSE: {root_mean_squared_error(y_train, y_pred)}')
print(f'R2: {r2_score(y_train, y_pred)}')

RMSE: 2.0989234213573074
R2: 0.5093220475799609


# Prepare test data

In [7]:
## Load the test data, preprocess and standardize it
test_file = os.path.join('..', '..', '..', '..', 'data', 'raw', 'test.csv')
test_data = pd.read_csv(test_file, index_col=0)
test_data = pipeline.transform(X=test_data)
test_data.head()

Unnamed: 0_level_0,bg-0:00,insulin-0:00,hr-0:00,cals-0:00,day_phase_evening,day_phase_morning,day_phase_night,day_phase_noon
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
p01_8459,0.440769,-0.152482,-1.369696,-0.293115,False,True,False,False
p01_8460,-1.226853,-0.135781,1.908258,3.165936,False,False,False,True
p01_8461,-0.09287,-0.144131,-0.253143,0.244698,False,False,False,False
p01_8462,0.540827,-0.135781,-1.103362,-0.458394,False,False,True,False
p01_8463,-0.993386,-0.194436,-1.047022,-0.458394,False,False,True,False


In [8]:
# Predict the bg+1:00 values
test_data['bg+1:00'] = ensemble.predict(test_data)
test_data.head()

Unnamed: 0_level_0,bg-0:00,insulin-0:00,hr-0:00,cals-0:00,day_phase_evening,day_phase_morning,day_phase_night,day_phase_noon,bg+1:00
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
p01_8459,0.440769,-0.152482,-1.369696,-0.293115,False,True,False,False,9.079422
p01_8460,-1.226853,-0.135781,1.908258,3.165936,False,False,False,True,6.118474
p01_8461,-0.09287,-0.144131,-0.253143,0.244698,False,False,False,False,7.721903
p01_8462,0.540827,-0.135781,-1.103362,-0.458394,False,False,True,False,9.105613
p01_8463,-0.993386,-0.194436,-1.047022,-0.458394,False,False,True,False,6.072016


## Prepare the submission file

In [9]:
submission = pd.DataFrame(test_data['bg+1:00'])
submission

Unnamed: 0_level_0,bg+1:00
id,Unnamed: 1_level_1
p01_8459,9.079422
p01_8460,6.118474
p01_8461,7.721903
p01_8462,9.105613
p01_8463,6.072016
...,...
p24_256,7.036430
p24_257,9.335601
p24_258,7.389974
p24_259,8.578509


### Save the submission file

In [10]:
submission.to_csv(f'submission-{os.path.basename(os.getcwd())}.csv')