# Modelling

In [1]:
import os
import pandas as pd

In [2]:
train_data_file = os.path.join('..', '..', '..', '..', 'data', 'raw', 'train.csv')
train_data = pd.read_csv(train_data_file, index_col=0, low_memory=False)
extra_train_data_file = os.path.join('..', '..', '..', '..', 'data', 'interim', 'all_test_2h.csv')
extra_train_data = pd.read_csv(extra_train_data_file, low_memory=False)

all_train_data = pd.concat([train_data, extra_train_data], axis=0)
all_train_data.head()

Unnamed: 0,p_num,time,bg-5:55,bg-5:50,bg-5:45,bg-5:40,bg-5:35,bg-5:30,bg-5:25,bg-5:20,...,activity-0:40,activity-0:35,activity-0:30,activity-0:25,activity-0:20,activity-0:15,activity-0:10,activity-0:05,activity-0:00,bg+1:00
p01_0,p01,06:10:00,,,9.6,,,9.7,,,...,,,,,,,,,,13.4
p01_1,p01,06:25:00,,,9.7,,,9.2,,,...,,,,,,,,,,12.8
p01_2,p01,06:40:00,,,9.2,,,8.7,,,...,,,,,,,,,,15.5
p01_3,p01,06:55:00,,,8.7,,,8.4,,,...,,,,,,,,,,14.8
p01_4,p01,07:10:00,,,8.4,,,8.1,,,...,,,,,,,,,,12.7


## Create patient-specific models

In [None]:
import numpy as np
import joblib
from pipelines import pipeline
from src.features.tuners import BaggingHyperparameterTuner

patient_ids = all_train_data['p_num'].unique()
for patient_id in patient_ids:
    print(f'-----------{patient_id}-----------')

    data = all_train_data[all_train_data['p_num'] == patient_id]
    data = data.drop(columns=['p_num'])

    train_data_patient = pipeline.fit_transform(data)
    X = train_data_patient.drop(columns=['bg+1:00'])
    y = np.log1p(train_data_patient['bg+1:00'])

    tuner = BaggingHyperparameterTuner()
    tuner.fit(X=X, y=y)

    print('Best hyperparameters found.')
    print(f'RMSE: {tuner.get_rmse()}')
    tuner.show_chart()

    joblib.dump(tuner.get_best_model(), f'{tuner.__name__}.{patient_id}.model.pkl')