# Predict the bg+1:00 values

In [1]:
import os
import joblib
import numpy as np
import pandas as pd

In [2]:
# Load train and test data
train_data_file = os.path.join('..', '..', '..', '..', 'data', 'raw', 'train.csv')
train_data_tmp = pd.read_csv(train_data_file, index_col=0, low_memory=False)
extra_train_data_file = os.path.join('..', '..', '..', '..', 'data', 'interim', 'all_test_2h.csv')
extra_train_data = pd.read_csv(extra_train_data_file, low_memory=False)

all_train_data = pd.concat([train_data_tmp, extra_train_data], axis=0)
all_train_data.head()

Unnamed: 0,p_num,time,bg-5:55,bg-5:50,bg-5:45,bg-5:40,bg-5:35,bg-5:30,bg-5:25,bg-5:20,...,activity-0:40,activity-0:35,activity-0:30,activity-0:25,activity-0:20,activity-0:15,activity-0:10,activity-0:05,activity-0:00,bg+1:00
p01_0,p01,06:10:00,,,9.6,,,9.7,,,...,,,,,,,,,,13.4
p01_1,p01,06:25:00,,,9.7,,,9.2,,,...,,,,,,,,,,12.8
p01_2,p01,06:40:00,,,9.2,,,8.7,,,...,,,,,,,,,,15.5
p01_3,p01,06:55:00,,,8.7,,,8.4,,,...,,,,,,,,,,14.8
p01_4,p01,07:10:00,,,8.4,,,8.1,,,...,,,,,,,,,,12.7


In [3]:
test_data_file = os.path.join('..', '..', '..', '..', 'data', 'raw', 'test.csv')
all_test_data = pd.read_csv(test_data_file, index_col=0, low_memory=False)

## Load patient-specific models

In [4]:
patient_ids = all_train_data['p_num'].unique()
patient_ids

array(['p01', 'p02', 'p03', 'p04', 'p05', 'p06', 'p10', 'p11', 'p12',
       'p15', 'p16', 'p18', 'p19', 'p21', 'p22', 'p24'], dtype=object)

# Prepare test results

In [5]:
submission = pd.DataFrame(index=all_test_data.index.copy())
submission['bg+1:00'] = -1.0

In [6]:
from pipelines import pipeline

patient_ids = all_train_data['p_num'].unique()
fitted_columns = []

specific_models = {}
for p_num in patient_ids:
    specific_model_file = f'BaggingPolynomialFeaturesRidgeCV.{p_num}.model.pkl'
    print(f'Loading {specific_model_file}')
    model = joblib.load(specific_model_file)
    model_name = specific_model_file.replace('.model.pkl', '')

    print(f'Preparing data for {p_num}')
    train_data = all_train_data[all_train_data['p_num'] == p_num]
    train_data = train_data.drop(columns=['p_num'])
    train_data = pipeline.fit_transform(train_data)
    
    X = train_data.drop(columns=['bg+1:00'])
    y = np.log1p(train_data['bg+1:00'])

    print(f'Fitting {specific_model_file}')
    fitted_columns = X.columns
    model.fit(X=X, y=y)
    specific_models[p_num] = model

Loading BaggingPolynomialFeaturesRidgeCV.p01.model.pkl
Preparing data for p01
Fitting BaggingPolynomialFeaturesRidgeCV.p01.model.pkl


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  3.4min remaining: 10.1min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  4.1min finished


Loading BaggingPolynomialFeaturesRidgeCV.p02.model.pkl
Preparing data for p02
Fitting BaggingPolynomialFeaturesRidgeCV.p02.model.pkl


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  9.6min remaining: 28.7min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed: 10.8min finished


Loading BaggingPolynomialFeaturesRidgeCV.p03.model.pkl
Preparing data for p03
Fitting BaggingPolynomialFeaturesRidgeCV.p03.model.pkl


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  5.2min remaining: 15.6min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  6.3min finished


Loading BaggingPolynomialFeaturesRidgeCV.p04.model.pkl
Preparing data for p04
Fitting BaggingPolynomialFeaturesRidgeCV.p04.model.pkl


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  9.6min remaining: 28.9min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed: 11.1min finished


Loading BaggingPolynomialFeaturesRidgeCV.p05.model.pkl
Preparing data for p05
Fitting BaggingPolynomialFeaturesRidgeCV.p05.model.pkl


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  3.6min remaining: 10.9min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  4.4min finished


Loading BaggingPolynomialFeaturesRidgeCV.p06.model.pkl
Preparing data for p06
Fitting BaggingPolynomialFeaturesRidgeCV.p06.model.pkl


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  3.3min remaining:  9.9min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  4.0min finished


Loading BaggingPolynomialFeaturesRidgeCV.p10.model.pkl
Preparing data for p10
Fitting BaggingPolynomialFeaturesRidgeCV.p10.model.pkl


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  5.1min remaining: 15.3min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  6.9min finished


Loading BaggingPolynomialFeaturesRidgeCV.p11.model.pkl
Preparing data for p11
Fitting BaggingPolynomialFeaturesRidgeCV.p11.model.pkl


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  6.3min remaining: 18.8min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  7.8min finished


Loading BaggingPolynomialFeaturesRidgeCV.p12.model.pkl
Preparing data for p12
Fitting BaggingPolynomialFeaturesRidgeCV.p12.model.pkl


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  9.3min remaining: 27.9min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed: 11.0min finished


Loading BaggingPolynomialFeaturesRidgeCV.p15.model.pkl
Preparing data for p15
Fitting BaggingPolynomialFeaturesRidgeCV.p15.model.pkl


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  2.3min remaining:  6.9min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  2.9min finished


Loading BaggingPolynomialFeaturesRidgeCV.p16.model.pkl
Preparing data for p16
Fitting BaggingPolynomialFeaturesRidgeCV.p16.model.pkl


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  2.0min remaining:  6.1min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  2.6min finished


Loading BaggingPolynomialFeaturesRidgeCV.p18.model.pkl
Preparing data for p18
Fitting BaggingPolynomialFeaturesRidgeCV.p18.model.pkl


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  1.9min remaining:  5.8min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  2.5min finished


Loading BaggingPolynomialFeaturesRidgeCV.p19.model.pkl
Preparing data for p19
Fitting BaggingPolynomialFeaturesRidgeCV.p19.model.pkl


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  2.0min remaining:  6.1min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  2.6min finished


Loading BaggingPolynomialFeaturesRidgeCV.p21.model.pkl
Preparing data for p21
Fitting BaggingPolynomialFeaturesRidgeCV.p21.model.pkl


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  2.1min remaining:  6.3min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  2.7min finished


Loading BaggingPolynomialFeaturesRidgeCV.p22.model.pkl
Preparing data for p22
Fitting BaggingPolynomialFeaturesRidgeCV.p22.model.pkl


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  1.9min remaining:  5.7min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  2.4min finished


Loading BaggingPolynomialFeaturesRidgeCV.p24.model.pkl
Preparing data for p24
Fitting BaggingPolynomialFeaturesRidgeCV.p24.model.pkl


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  2.1min remaining:  6.4min
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  2.7min finished


In [7]:
from pipelines import pipeline

# Predict the bg+1:00 values
for p_num in patient_ids:
    print(f'----------------------------------------')
    print(f'Load model for {p_num}')
    model = specific_models[p_num]

    print(f'Prepare data for {p_num}')
    train_data = all_train_data[all_train_data['p_num'] == p_num]
    pipeline.fit_transform(train_data)
    
    test_data = all_test_data[all_test_data['p_num'] == p_num]
    if len(test_data) == 0:
        continue
    test_data = pipeline.transform(test_data)
    X = test_data.drop(columns=['p_num'])
    X = X[fitted_columns]

    print(f'Predict for {p_num}')
    prediction = np.expm1(model.predict(X=X))
    submission.loc[submission.index.isin(test_data.index), 'bg+1:00'] = prediction

----------------------------------------
Load model for p01
Prepare data for p01
Predict for p01
----------------------------------------
Load model for p02
Prepare data for p02


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.0s finished


Predict for p02
----------------------------------------
Load model for p03
Prepare data for p03


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.0s finished


----------------------------------------
Load model for p04
Prepare data for p04
Predict for p04
----------------------------------------
Load model for p05
Prepare data for p05


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.0s finished


Predict for p05
----------------------------------------
Load model for p06
Prepare data for p06


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.0s finished


Predict for p06
----------------------------------------
Load model for p10
Prepare data for p10


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.0s finished


Predict for p10
----------------------------------------
Load model for p11
Prepare data for p11


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.0s finished


Predict for p11
----------------------------------------
Load model for p12
Prepare data for p12


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.0s finished


Predict for p12
----------------------------------------
Load model for p15
Prepare data for p15


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.0s finished


Predict for p15
----------------------------------------
Load model for p16
Prepare data for p16


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.0s finished


Predict for p16
----------------------------------------
Load model for p18
Prepare data for p18


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.0s finished


Predict for p18
----------------------------------------
Load model for p19
Prepare data for p19


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.0s finished


Predict for p19
----------------------------------------
Load model for p21
Prepare data for p21


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.0s finished


Predict for p21
----------------------------------------
Load model for p22
Prepare data for p22


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.0s finished


Predict for p22
----------------------------------------
Load model for p24
Prepare data for p24


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.0s finished


Predict for p24


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.0s finished


## Prepare the submission file

### Save the submission file

In [8]:
# check if there are missing predictions in the submission
missing_predictions = submission[submission['bg+1:00'] == -1.0]
missing_predictions

Unnamed: 0_level_0,bg+1:00
id,Unnamed: 1_level_1


In [9]:
submission.to_csv(f'submission-{os.path.basename(os.getcwd())}.csv')
submission

Unnamed: 0_level_0,bg+1:00
id,Unnamed: 1_level_1
p01_8459,8.367717
p01_8460,4.995464
p01_8461,9.116430
p01_8462,12.423482
p01_8463,7.668889
...,...
p24_256,6.688516
p24_257,14.335296
p24_258,6.405231
p24_259,7.913255
