# Description

Make predictions based on train and extended train data

## Feature engineering

* bg, insulin, cals, hr, steps 
* no carbs and activity
* time hourly, categorical
* p_num categorical (only p_nums which are in test-data)

In [12]:
import os
import pandas as pd
from pipelines import standardization_pipeline, preprocessing_pipeline

train_data_file = os.path.join('..', '..', '..', '..', 'data', 'raw', 'train.csv')
train_data = pd.read_csv(train_data_file, index_col=0, low_memory=False)

validation_data_file = os.path.join('..', '..', '..', '..', 'data', 'interim', 'all_test_4_55h.csv')
validation_data = pd.read_csv(validation_data_file, index_col=0, low_memory=False)

additional_train_data_file = os.path.join('..', '..', '..', '..', 'data', 'interim', 'all_test_3h.csv')
additional_train_data = pd.read_csv(additional_train_data_file, index_col=0, low_memory=False)
# remove from additional data patients that are not in validation data ids
additional_train_data = additional_train_data[~additional_train_data.index.isin(validation_data.index.unique())]

# merge train and additional data
train_data = pd.concat([train_data, additional_train_data], axis=0)

# do not train with patients that are not have to be predicted
test_data_file = os.path.join('..', '..', '..', '..', 'data', 'raw', 'test.csv')
test_data = pd.read_csv(test_data_file, index_col=0, low_memory=False)

unique_patients = test_data['p_num'].unique()
train_data = train_data[train_data['p_num'].isin(unique_patients)]
validation_data = validation_data[validation_data['p_num'].isin(unique_patients)]
test_data = test_data[test_data['p_num'].isin(unique_patients)]

train_data.head()

Unnamed: 0_level_0,p_num,time,bg-5:55,bg-5:50,bg-5:45,bg-5:40,bg-5:35,bg-5:30,bg-5:25,bg-5:20,...,activity-0:40,activity-0:35,activity-0:30,activity-0:25,activity-0:20,activity-0:15,activity-0:10,activity-0:05,activity-0:00,bg+1:00
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
p01_0,p01,06:10:00,,,9.6,,,9.7,,,...,,,,,,,,,,13.4
p01_1,p01,06:25:00,,,9.7,,,9.2,,,...,,,,,,,,,,12.8
p01_2,p01,06:40:00,,,9.2,,,8.7,,,...,,,,,,,,,,15.5
p01_3,p01,06:55:00,,,8.7,,,8.4,,,...,,,,,,,,,,14.8
p01_4,p01,07:10:00,,,8.4,,,8.1,,,...,,,,,,,,,,12.7


In [10]:
validation_data.head()

Unnamed: 0,id,p_num,time,bg-5:55,bg-5:50,bg-5:45,bg-5:40,bg-5:35,bg-5:30,bg-5:25,...,activity-0:40,activity-0:35,activity-0:30,activity-0:25,activity-0:20,activity-0:15,activity-0:10,activity-0:05,activity-0:00,bg+1:00
0,p01_test_59,p01,05:45:00,,,,,,,,...,,,,,,,,,,9.6
1,p01_test_403,p01,10:25:00,,,,,,,,...,,,,,,,,,,4.6
2,p01_test_731,p01,13:45:00,,,,,,,,...,,,,,,,,,,8.0
3,p01_test_1184,p01,03:30:00,,,,,,,,...,,,,,,,,,,9.9
4,p01_test_1758,p01,03:20:00,,,,,,,,...,,,,,,,,,,5.3


## Preprocessing

In [3]:
train_data = preprocessing_pipeline.fit_transform(train_data)
validation_data = preprocessing_pipeline.transform(validation_data)

In [4]:
train_data.head()

Unnamed: 0,p_num,bg-3:00,bg-2:55,bg-2:50,bg-2:45,bg-2:40,bg-2:35,bg-2:30,bg-2:25,bg-2:20,...,cals-0:35,cals-0:30,cals-0:25,cals-0:20,cals-0:15,cals-0:10,cals-0:05,cals-0:00,bg+1:00,hour
p01_0,p01,13.9,14.0,14.1,14.2,14.2,14.2,14.2,14.6,15.0,...,6.12,6.12,6.12,6.12,6.12,6.12,6.12,6.12,13.4,6
p01_1,p01,14.2,14.2,14.2,14.2,14.6,15.0,15.4,16.0,16.6,...,6.12,6.12,6.12,6.12,6.12,6.12,6.12,6.12,12.8,6
p01_2,p01,14.2,14.6,15.0,15.4,16.0,16.6,17.2,17.533333,17.866667,...,6.12,6.12,6.12,6.12,6.12,6.12,6.12,6.12,15.5,6
p01_3,p01,15.4,16.0,16.6,17.2,17.533333,17.866667,18.2,18.266667,18.333333,...,6.12,6.12,6.12,6.12,6.12,6.12,6.12,6.12,14.8,6
p01_4,p01,17.2,17.533333,17.866667,18.2,18.266667,18.333333,18.4,18.266667,18.133333,...,6.12,6.12,6.12,6.12,6.12,6.12,6.12,6.12,12.7,7


In [5]:
validation_data.head()

Unnamed: 0,p_num,bg-3:00,bg-2:55,bg-2:50,bg-2:45,bg-2:40,bg-2:35,bg-2:30,bg-2:25,bg-2:20,...,cals-0:35,cals-0:30,cals-0:25,cals-0:20,cals-0:15,cals-0:10,cals-0:05,cals-0:00,bg+1:00,hour
0,p01,9.2,9.2,9.53,9.87,10.2,10.23,10.27,10.3,10.27,...,4.8,7.89,7.98,4.8,4.8,4.8,4.8,4.8,13.73,3
1,p01,9.2,9.53,9.87,10.2,10.23,10.27,10.3,10.27,10.23,...,7.89,7.98,4.8,4.8,4.8,4.8,4.8,4.9,13.7,3
2,p01,9.53,9.87,10.2,10.23,10.27,10.3,10.27,10.23,10.2,...,7.98,4.8,4.8,4.8,4.8,4.8,4.9,4.8,13.77,4
3,p01,9.87,10.2,10.23,10.27,10.3,10.27,10.23,10.2,10.7,...,4.8,4.8,4.8,4.8,4.8,4.9,4.8,4.8,13.83,4
4,p01,10.2,10.23,10.27,10.3,10.27,10.23,10.2,10.7,11.2,...,4.8,4.8,4.8,4.8,4.9,4.8,4.8,4.8,13.9,4


## Standardization Pipeline

In [6]:
train_data = standardization_pipeline.fit_transform(train_data)
validation_data = standardization_pipeline.transform(validation_data)

In [7]:
train_data.head()

Unnamed: 0,bg-3:00,bg-2:55,bg-2:50,bg-2:45,bg-2:40,bg-2:35,bg-2:30,bg-2:25,bg-2:20,bg-2:15,...,p_num_p10,p_num_p11,p_num_p12,p_num_p15,p_num_p16,p_num_p18,p_num_p19,p_num_p21,p_num_p22,p_num_p24
p01_0,1.769108,1.803275,1.835647,1.867437,1.869058,1.868705,1.867557,1.998148,2.126792,2.25442,...,0,0,0,0,0,0,0,0,0,0
p01_1,1.866067,1.867977,1.867997,1.867437,1.998525,2.127587,2.25564,2.451189,2.64436,2.836235,...,0,0,0,0,0,0,0,0,0,0
p01_2,1.866067,1.997381,2.126796,2.255508,2.451657,2.645351,2.837764,2.947376,3.054101,3.159466,...,0,0,0,0,0,0,0,0,0,0
p01_3,2.253905,2.450294,2.644395,2.837614,2.947946,3.055247,3.161166,3.184683,3.205058,3.224112,...,0,0,0,0,0,0,0,0,0,0
p01_4,2.835661,2.946342,3.054161,3.161007,3.185301,3.206262,3.225847,3.184683,3.140362,3.094819,...,0,0,0,0,0,0,0,0,0,0


In [8]:
validation_data.head()

Unnamed: 0,bg-3:00,bg-2:55,bg-2:50,bg-2:45,bg-2:40,bg-2:35,bg-2:30,bg-2:25,bg-2:20,bg-2:15,...,p_num_p10,p_num_p11,p_num_p12,p_num_p15,p_num_p16,p_num_p18,p_num_p19,p_num_p21,p_num_p22,p_num_p24
0,0.250077,0.250429,0.357255,0.467147,0.574392,0.584004,0.596586,0.606667,0.596733,0.583319,...,0,0,0,0,0,0,0,0,0,0
1,0.250077,0.357187,0.467245,0.573866,0.584102,0.596948,0.606288,0.596959,0.583793,0.573622,...,0,0,0,0,0,0,0,0,0,0
2,0.356732,0.46718,0.574,0.583568,0.597049,0.606656,0.596586,0.584015,0.574089,0.735237,...,0,0,0,0,0,0,0,0,0,0
3,0.46662,0.573938,0.583705,0.596504,0.606759,0.596948,0.58365,0.574307,0.735829,0.896852,...,0,0,0,0,0,0,0,0,0,0
4,0.573275,0.583644,0.596645,0.606205,0.597049,0.584004,0.573948,0.736107,0.897569,1.058467,...,0,0,0,0,0,0,0,0,0,0
