# Description

Make predictions based on train and extended train data

## Feature engineering

* bg, insulin, cals, hr, steps 
* no carbs and activity
* time hourly, categorical
* p_num categorical (only p_nums which are in test-data)

In [1]:
import os
import pandas as pd
from pipelines import standardization_pipeline, preprocessing_pipeline

train_data_file = os.path.join('..', '..', '..', '..', 'data', 'raw', 'train.csv')
train_data = pd.read_csv(train_data_file, index_col=0, low_memory=False)
additional_data_file = os.path.join('..', '..', '..', '..', 'data', 'interim', 'all_test_5h.csv')
additional_data = pd.read_csv(additional_data_file, low_memory=False)

# merge train and additional data
train_data = pd.concat([train_data, additional_data], axis=0)

# read validation data
validation_data_file = os.path.join('..', '..', '..', '..', 'data', 'interim', 'all_test_3h.csv')
validation_data = pd.read_csv(validation_data_file, low_memory=False)

# do not train with patients that are not have to be predicted
unique_patients = validation_data['p_num'].unique()
train_data = train_data[train_data['p_num'].isin(unique_patients)]
train_data.head()

Unnamed: 0,p_num,time,bg-5:55,bg-5:50,bg-5:45,bg-5:40,bg-5:35,bg-5:30,bg-5:25,bg-5:20,...,activity-0:40,activity-0:35,activity-0:30,activity-0:25,activity-0:20,activity-0:15,activity-0:10,activity-0:05,activity-0:00,bg+1:00
p01_0,p01,06:10:00,,,9.6,,,9.7,,,...,,,,,,,,,,13.4
p01_1,p01,06:25:00,,,9.7,,,9.2,,,...,,,,,,,,,,12.8
p01_2,p01,06:40:00,,,9.2,,,8.7,,,...,,,,,,,,,,15.5
p01_3,p01,06:55:00,,,8.7,,,8.4,,,...,,,,,,,,,,14.8
p01_4,p01,07:10:00,,,8.4,,,8.1,,,...,,,,,,,,,,12.7


## Preprocessing

In [2]:
train_data = preprocessing_pipeline.fit_transform(train_data)
validation_data = preprocessing_pipeline.transform(validation_data)

In [3]:
train_data.head()

Unnamed: 0,p_num,bg-3:00,bg-2:55,bg-2:50,bg-2:45,bg-2:40,bg-2:35,bg-2:30,bg-2:25,bg-2:20,...,cals-0:35,cals-0:30,cals-0:25,cals-0:20,cals-0:15,cals-0:10,cals-0:05,cals-0:00,bg+1:00,hour
p01_0,p01,13.9,14.0,14.1,14.2,14.2,14.2,14.2,14.6,15.0,...,6.28,6.28,6.28,6.28,6.28,6.28,6.28,6.28,13.4,6
p01_1,p01,14.2,14.2,14.2,14.2,14.6,15.0,15.4,16.0,16.6,...,6.28,6.28,6.28,6.28,6.28,6.28,6.28,6.28,12.8,6
p01_2,p01,14.2,14.6,15.0,15.4,16.0,16.6,17.2,17.533333,17.866667,...,6.28,6.28,6.28,6.28,6.28,6.28,6.28,6.28,15.5,6
p01_3,p01,15.4,16.0,16.6,17.2,17.533333,17.866667,18.2,18.266667,18.333333,...,6.28,6.28,6.28,6.28,6.28,6.28,6.28,6.28,14.8,6
p01_4,p01,17.2,17.533333,17.866667,18.2,18.266667,18.333333,18.4,18.266667,18.133333,...,6.28,6.28,6.28,6.28,6.28,6.28,6.28,6.28,12.7,7


In [4]:
validation_data.head()

Unnamed: 0,p_num,bg-3:00,bg-2:55,bg-2:50,bg-2:45,bg-2:40,bg-2:35,bg-2:30,bg-2:25,bg-2:20,...,cals-0:35,cals-0:30,cals-0:25,cals-0:20,cals-0:15,cals-0:10,cals-0:05,cals-0:00,bg+1:00,hour
0,p01,9.2,9.2,9.53,9.87,10.2,10.23,10.27,10.3,10.27,...,4.8,7.89,7.98,4.8,4.8,4.8,4.8,4.8,13.73,3
1,p01,9.2,9.53,9.87,10.2,10.23,10.27,10.3,10.27,10.23,...,7.89,7.98,4.8,4.8,4.8,4.8,4.8,4.9,13.7,3
2,p01,9.53,9.87,10.2,10.23,10.27,10.3,10.27,10.23,10.2,...,7.98,4.8,4.8,4.8,4.8,4.8,4.9,4.8,13.77,4
3,p01,9.87,10.2,10.23,10.27,10.3,10.27,10.23,10.2,10.7,...,4.8,4.8,4.8,4.8,4.8,4.9,4.8,4.8,13.83,4
4,p01,10.2,10.23,10.27,10.3,10.27,10.23,10.2,10.7,11.2,...,4.8,4.8,4.8,4.8,4.9,4.8,4.8,4.8,13.9,4


## Standardization Pipeline

In [5]:
train_data = standardization_pipeline.fit_transform(train_data)
validation_data = standardization_pipeline.transform(validation_data)

In [6]:
train_data.head()

Unnamed: 0,bg-3:00,bg-2:55,bg-2:50,bg-2:45,bg-2:40,bg-2:35,bg-2:30,bg-2:25,bg-2:20,bg-2:15,...,p_num_p10,p_num_p11,p_num_p12,p_num_p15,p_num_p16,p_num_p18,p_num_p19,p_num_p21,p_num_p22,p_num_p24
p01_0,1.86622,1.901417,1.933736,1.965406,1.967451,1.966604,1.96478,2.10042,2.232985,2.364281,...,0,0,0,0,0,0,0,0,0,0
p01_1,1.966246,1.968171,1.967097,1.965406,2.100931,2.233456,2.364692,2.567505,2.766582,2.964031,...,0,0,0,0,0,0,0,0,0,0
p01_2,1.966246,2.101679,2.233985,2.365412,2.568111,2.767159,2.964561,3.079075,3.189013,3.297225,...,0,0,0,0,0,0,0,0,0,0
p01_3,2.366354,2.568958,2.767761,2.965421,3.079784,3.189674,3.297822,3.323738,3.344645,3.363864,...,0,0,0,0,0,0,0,0,0,0
p01_4,2.966514,3.080739,3.190334,3.298759,3.324497,3.345337,3.364474,3.323738,3.277945,3.230586,...,0,0,0,0,0,0,0,0,0,0


In [7]:
validation_data.head()

Unnamed: 0,bg-3:00,bg-2:55,bg-2:50,bg-2:45,bg-2:40,bg-2:35,bg-2:30,bg-2:25,bg-2:20,bg-2:15,...,p_num_p10,p_num_p11,p_num_p12,p_num_p15,p_num_p16,p_num_p18,p_num_p19,p_num_p21,p_num_p22,p_num_p24
0,0.299134,0.299319,0.409137,0.522052,0.632651,0.642354,0.655065,0.665801,0.655539,0.641668,...,0,0,0,0,0,0,0,0,0,0
1,0.299134,0.409463,0.522564,0.632054,0.642662,0.655696,0.665063,0.655792,0.6422,0.631672,...,0,0,0,0,0,0,0,0,0,0
2,0.409163,0.522945,0.632656,0.642054,0.65601,0.665703,0.655065,0.642446,0.632195,0.798269,...,0,0,0,0,0,0,0,0,0,0
3,0.522527,0.63309,0.642664,0.655387,0.666021,0.655696,0.641735,0.632437,0.798944,0.964866,...,0,0,0,0,0,0,0,0,0,0
4,0.632556,0.643103,0.656008,0.665387,0.65601,0.642354,0.631737,0.799254,0.965693,1.131463,...,0,0,0,0,0,0,0,0,0,0
