# Predict the bg+1:00 values

In [1]:
import os
import joblib
import numpy as np
import pandas as pd

In [2]:
# Load train and test data
train_data_file = os.path.join('..', '..', '..', '..', 'data', 'raw', 'train.csv')
train_data_tmp = pd.read_csv(train_data_file, index_col=0, low_memory=False)
extra_train_data_file = os.path.join('..', '..', '..', '..', 'data', 'interim', 'all_test_2h.csv')
extra_train_data = pd.read_csv(extra_train_data_file, low_memory=False)

all_train_data = pd.concat([train_data_tmp, extra_train_data], axis=0)
all_train_data.head()

Unnamed: 0,p_num,time,bg-5:55,bg-5:50,bg-5:45,bg-5:40,bg-5:35,bg-5:30,bg-5:25,bg-5:20,...,activity-0:40,activity-0:35,activity-0:30,activity-0:25,activity-0:20,activity-0:15,activity-0:10,activity-0:05,activity-0:00,bg+1:00
p01_0,p01,06:10:00,,,9.6,,,9.7,,,...,,,,,,,,,,13.4
p01_1,p01,06:25:00,,,9.7,,,9.2,,,...,,,,,,,,,,12.8
p01_2,p01,06:40:00,,,9.2,,,8.7,,,...,,,,,,,,,,15.5
p01_3,p01,06:55:00,,,8.7,,,8.4,,,...,,,,,,,,,,14.8
p01_4,p01,07:10:00,,,8.4,,,8.1,,,...,,,,,,,,,,12.7


In [3]:
test_data_file = os.path.join('..', '..', '..', '..', 'data', 'raw', 'test.csv')
test_data = pd.read_csv(test_data_file, index_col=0, low_memory=False)

## Load patient-specific models

In [4]:
patient_ids = all_train_data['p_num'].unique()
patient_ids

array(['p01', 'p02', 'p03', 'p04', 'p05', 'p06', 'p10', 'p11', 'p12',
       'p15', 'p16', 'p18', 'p19', 'p21', 'p22', 'p24'], dtype=object)

# Prepare test results

In [5]:
submission = pd.DataFrame(index=test_data.index.copy())
submission['bg+1:00'] = -1.0

In [6]:
from pipelines import pipeline

patient_ids = all_train_data['p_num'].unique()
fitted_columns = []

specific_models = {}
for p_num in patient_ids:
    specific_model_file = f'XGBRegressor.{p_num}.model.pkl'
    print(f'Loading {specific_model_file}')
    model = joblib.load(specific_model_file)
    model_name = specific_model_file.replace('.model.pkl', '')

    print(f'Preparing data for {p_num}')
    patient_data = all_train_data[all_train_data['p_num'] == p_num]
    patient_data = patient_data.drop(columns=['p_num'])
    patient_data = pipeline.fit_transform(patient_data)
    X = patient_data.drop(columns=['bg+1:00'])
    y = np.log1p(patient_data['bg+1:00'])

    print(f'Fitting {specific_model_file}')
    fitted_columns = X.columns
    model.fit(X=X, y=y)
    specific_models[p_num] = model

Loading XGBRegressor.p01.model.pkl
Preparing data for p01
Fitting XGBRegressor.p01.model.pkl
Loading XGBRegressor.p02.model.pkl
Preparing data for p02
Fitting XGBRegressor.p02.model.pkl
Loading XGBRegressor.p03.model.pkl
Preparing data for p03
Fitting XGBRegressor.p03.model.pkl
Loading XGBRegressor.p04.model.pkl
Preparing data for p04
Fitting XGBRegressor.p04.model.pkl
Loading XGBRegressor.p05.model.pkl
Preparing data for p05
Fitting XGBRegressor.p05.model.pkl
Loading XGBRegressor.p06.model.pkl
Preparing data for p06
Fitting XGBRegressor.p06.model.pkl
Loading XGBRegressor.p10.model.pkl
Preparing data for p10
Fitting XGBRegressor.p10.model.pkl
Loading XGBRegressor.p11.model.pkl
Preparing data for p11
Fitting XGBRegressor.p11.model.pkl
Loading XGBRegressor.p12.model.pkl
Preparing data for p12
Fitting XGBRegressor.p12.model.pkl
Loading XGBRegressor.p15.model.pkl
Preparing data for p15
Fitting XGBRegressor.p15.model.pkl
Loading XGBRegressor.p16.model.pkl
Preparing data for p16
Fitting XGBR

In [7]:
from pipelines import pipeline

# Predict the bg+1:00 values
for p_num in patient_ids:
    print(f'----------------------------------------')
    print(f'Load model for {p_num}')
    model = specific_models[p_num]

    print(f'Prepare data for {p_num}')
    raw_train_data = all_train_data[all_train_data['p_num'] == p_num]
    pipeline.fit_transform(raw_train_data)
    raw_test_data = test_data[test_data['p_num'] == p_num]
    if len(raw_test_data) == 0:
        continue
    raw_test_data = pipeline.transform(raw_test_data)
    X = raw_test_data.drop(columns=['p_num'])
    X = X[fitted_columns]

    print(f'Predict for {p_num}')
    prediction = np.expm1(model.predict(X=X))
    submission.loc[submission.index.isin(raw_test_data.index), 'bg+1:00'] = prediction

----------------------------------------
Load model for p01
Prepare data for p01
17392
Predict for p01
----------------------------------------
Load model for p02
Prepare data for p02
34119
Predict for p02
----------------------------------------
Load model for p03
Prepare data for p03
26028
----------------------------------------
Load model for p04
Prepare data for p04
34074
Predict for p04
----------------------------------------
Load model for p05
Prepare data for p05
18479
Predict for p05
----------------------------------------
Load model for p06
Prepare data for p06
17039
Predict for p06
----------------------------------------
Load model for p10
Prepare data for p10
32064
Predict for p10
----------------------------------------
Load model for p11
Prepare data for p11
32642
Predict for p11
----------------------------------------
Load model for p12
Prepare data for p12
35903
Predict for p12
----------------------------------------
Load model for p15
Prepare data for p15
10748
Pr

## Prepare the submission file

### Save the submission file

In [8]:
# check if there are missing predictions in the submission
missing_predictions = submission[submission['bg+1:00'] == -1.0]
missing_predictions

Unnamed: 0_level_0,bg+1:00
id,Unnamed: 1_level_1


In [9]:
submission.to_csv(f'submission-{os.path.basename(os.getcwd())}.csv')
submission

Unnamed: 0_level_0,bg+1:00
id,Unnamed: 1_level_1
p01_8459,8.134215
p01_8460,6.601960
p01_8461,8.145835
p01_8462,10.968028
p01_8463,7.637263
...,...
p24_256,6.366510
p24_257,8.838026
p24_258,6.437591
p24_259,7.686871
