# Predict the bg+1:00 values

In [1]:
import os
import joblib
import pandas as pd

In [2]:
# Load train and test data
train_data_file = os.path.join('train_data.csv')
train_data = pd.read_csv(train_data_file, index_col=0, low_memory=False)
test_data_file = os.path.join('..', '..', '..', '..', 'data', 'raw', 'test.csv')
test_data = pd.read_csv(test_data_file, index_col=0, low_memory=False)

In [3]:
train_data.head()

Unnamed: 0,day_phase,p_num,bg+1:00,bg-0:00,bg-0:05,bg-0:10,bg-0:15,bg-0:20,bg-0:25,bg-0:30,...,insulin-1:15,insulin-1:20,insulin-1:25,insulin-1:30,insulin-1:35,insulin-1:40,insulin-1:45,insulin-1:50,insulin-1:55,insulin-2:00
p01_0,morning,p01,13.4,15.1,15.466667,15.833333,16.2,16.566667,16.933333,17.3,...,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583
p01_1,morning,p01,12.8,14.4,14.633333,14.866667,15.1,15.466667,15.833333,16.2,...,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583
p01_2,morning,p01,15.5,13.9,14.066667,14.233333,14.4,14.633333,14.866667,15.1,...,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583
p01_3,morning,p01,14.8,13.8,13.833333,13.866667,13.9,14.066667,14.233333,14.4,...,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583
p01_4,morning,p01,12.7,13.4,13.533333,13.666667,13.8,13.833333,13.866667,13.9,...,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583,0.0583


In [4]:
test_data.head()

Unnamed: 0_level_0,p_num,time,bg-5:55,bg-5:50,bg-5:45,bg-5:40,bg-5:35,bg-5:30,bg-5:25,bg-5:20,...,activity-0:45,activity-0:40,activity-0:35,activity-0:30,activity-0:25,activity-0:20,activity-0:15,activity-0:10,activity-0:05,activity-0:00
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
p01_8459,p01,06:45:00,,9.2,,,10.2,,,10.3,...,,,,,,,,,,
p01_8460,p01,11:25:00,,,9.9,,,9.4,,,...,,,,,,,,Walk,Walk,Walk
p01_8461,p01,14:45:00,,5.5,,,5.5,,,5.2,...,,,,,,,,,,
p01_8462,p01,04:30:00,,3.4,,,3.9,,,4.7,...,,,,,,,,,,
p01_8463,p01,04:20:00,,,8.3,,,10.0,,,...,,,,,,,,,,


## Load patient-specific models

In [5]:
patient_ids = train_data['p_num'].unique()
patient_ids

array(['p01', 'p02', 'p03', 'p04', 'p05', 'p06', 'p10', 'p11', 'p12',
       'p15', 'p16', 'p18', 'p19', 'p21', 'p22', 'p24'], dtype=object)

In [6]:
from pipelines import standardization_pipeline

patient_ids = train_data['p_num'].unique()
fitted_columns = []

specific_models = {}
for p_num in patient_ids:
    specific_model_file = f'XGBRegressor.{p_num}.model.pkl'
    print(f'Loading {specific_model_file}')
    model = joblib.load(specific_model_file)
    model_name = specific_model_file.replace('.model.pkl', '')

    print(f'Preparing data for {p_num}')
    patient_data = train_data[train_data['p_num'] == p_num]
    X = patient_data.drop(columns=['bg+1:00'])
    y = patient_data['bg+1:00']
    X = standardization_pipeline.fit_transform(X=X)
    X = X.drop(columns=['p_num'])
    
    print(f'Fitting {specific_model_file}')
    fitted_columns = X.columns
    model.fit(X=X, y=y)
    specific_models[p_num] = model

Loading XGBRegressor.p01.model.pkl
Preparing data for p01
Fitting XGBRegressor.p01.model.pkl
Loading XGBRegressor.p02.model.pkl
Preparing data for p02
Fitting XGBRegressor.p02.model.pkl
Loading XGBRegressor.p03.model.pkl
Preparing data for p03
Fitting XGBRegressor.p03.model.pkl
Loading XGBRegressor.p04.model.pkl
Preparing data for p04
Fitting XGBRegressor.p04.model.pkl
Loading XGBRegressor.p05.model.pkl
Preparing data for p05
Fitting XGBRegressor.p05.model.pkl
Loading XGBRegressor.p06.model.pkl
Preparing data for p06
Fitting XGBRegressor.p06.model.pkl
Loading XGBRegressor.p10.model.pkl
Preparing data for p10
Fitting XGBRegressor.p10.model.pkl
Loading XGBRegressor.p11.model.pkl
Preparing data for p11
Fitting XGBRegressor.p11.model.pkl
Loading XGBRegressor.p12.model.pkl
Preparing data for p12
Fitting XGBRegressor.p12.model.pkl
Loading XGBRegressor.p15.model.pkl
Preparing data for p15
Fitting XGBRegressor.p15.model.pkl
Loading XGBRegressor.p16.model.pkl
Preparing data for p16
Fitting XGBR

# Prepare test results

In [7]:
submission = pd.DataFrame(index=test_data.index.copy())
submission['bg+1:00'] = -1.0

In [8]:
from pipelines import preprocessing_pipeline, standardization_pipeline, pipeline
# Predict the bg+1:00 values
for p_num in patient_ids:
    print(f'----------------------------------------')
    print(f'Load model for {p_num}')
    model = specific_models[p_num]

    print(f'Prepare data for {p_num}')
    raw_train_data = train_data[train_data['p_num'] == p_num]
    pipeline.fit_transform(raw_train_data)
    
    raw_test_data = test_data[test_data['p_num'] == p_num]
    if len(raw_test_data) == 0:
        continue
    raw_test_data = pipeline.transform(raw_test_data)
    X = raw_test_data.drop(columns=['p_num'])
    X = X[fitted_columns]

    print(f'Predict for {p_num}')
    prediction = model.predict(X=X)
    submission.loc[submission.index.isin(raw_test_data.index), 'bg+1:00'] = prediction

----------------------------------------
Load model for p01
Prepare data for p01
Predict for p01
----------------------------------------
Load model for p02
Prepare data for p02
Predict for p02
----------------------------------------
Load model for p03
Prepare data for p03
----------------------------------------
Load model for p04
Prepare data for p04
Predict for p04
----------------------------------------
Load model for p05
Prepare data for p05
Predict for p05
----------------------------------------
Load model for p06
Prepare data for p06
Predict for p06
----------------------------------------
Load model for p10
Prepare data for p10
Predict for p10
----------------------------------------
Load model for p11
Prepare data for p11
Predict for p11
----------------------------------------
Load model for p12
Prepare data for p12
Predict for p12
----------------------------------------
Load model for p15
Prepare data for p15
Predict for p15
----------------------------------------
Load 

## Prepare the submission file

### Save the submission file

In [9]:
# check if there are missing predictions in the submission
missing_predictions = submission[submission['bg+1:00'] == -1.0]
missing_predictions

Unnamed: 0_level_0,bg+1:00
id,Unnamed: 1_level_1


In [10]:
submission.to_csv(f'submission-{os.path.basename(os.getcwd())}.csv')
submission

Unnamed: 0_level_0,bg+1:00
id,Unnamed: 1_level_1
p01_8459,7.963037
p01_8460,5.203694
p01_8461,8.303721
p01_8462,10.321416
p01_8463,6.243777
...,...
p24_256,6.340204
p24_257,10.331060
p24_258,6.544921
p24_259,8.676013
