# Testing

In [47]:
%load_ext watermark
%watermark -v -n -m -p numpy,scipy,sklearn,pandas

The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
Mon Oct 22 2018 

CPython 3.6.4
IPython 7.0.1

numpy 1.15.2
scipy 1.1.0
sklearn 0.20.0
pandas 0.23.4

compiler   : GCC 7.2.0
system     : Linux
release    : 4.15.0-36-generic
machine    : x86_64
processor  : x86_64
CPU cores  : 12
interpreter: 64bit


In [48]:
# Magic commands must be in separate cells 
# to properly display light background for 
# plots with JupyterLab dark theme 

%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [49]:
import numpy as np
import pandas as pd
import sys
import os

import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.ticker as mtick

import seaborn as sns
plt.style.use('ggplot')
sns.set()
# plt.style.use('seaborn')
mpl.style.use('seaborn')

pd.options.mode.chained_assignment = None  # default='warn'
pd.options.display.max_columns = 999
pd.options.display.max_rows = 40
np.set_printoptions(precision=6)

print(pd.__version__)

# UGLY HACK - not for production
sys.path.append('..')

0.23.4


## Params

In [50]:
import utils

PARAM_FILE = '../experiments/00_baseline.json'

params = utils.Params()
params.load(PARAM_FILE)

print(params)

{'dataset': 'baseline', 'train_bs': 256, 'test_bs': 256, 'optimizer': {'type': 'Adam', 'weight_decay': 1e-09}}


## Features

In [51]:
DATA_DIR = '../data/processed'

FEAT_FILE = os.path.join(DATA_DIR, params['dataset'], 'features.json')
features = utils.Features()
features.load(FEAT_FILE)

print(features)

{'continuous': ['time_in_hospital', 'num_lab_procedures', 'num_procedures', 'num_medications', 'number_outpatient', 'number_emergency', 'number_inpatient', 'number_diagnoses'], 'categorical': ['race', 'gender', 'age', 'weight', 'admission_type_id', 'discharge_disposition_id', 'admission_source_id', 'payer_code', 'medical_specialty', 'diag_1', 'diag_2', 'diag_3', 'max_glu_serum', 'A1Cresult', 'metformin', 'glipizide', 'glyburide', 'pioglitazone', 'rosiglitazone', 'insulin', 'change', 'diabetesMed'], 'output': ['readmitted'], 'embedding_sizes': [[6, 3], [3, 2], [10, 5], [10, 5], [8, 4], [23, 12], [13, 7], [17, 9], [63, 32], [536, 50], [539, 50], [592, 50], [4, 2], [4, 2], [4, 2], [4, 2], [4, 2], [4, 2], [4, 2], [4, 2], [2, 1], [2, 1]]}


## Dataset

In [52]:
TRAIN = os.path.join(DATA_DIR, params['dataset'], 'train.csv')
train_df = pd.read_csv(TRAIN)
display(train_df.head(10))

Unnamed: 0,readmitted,time_in_hospital,num_lab_procedures,num_procedures,num_medications,number_outpatient,number_emergency,number_inpatient,number_diagnoses,race,gender,age,weight,admission_type_id,discharge_disposition_id,admission_source_id,payer_code,medical_specialty,diag_1,diag_2,diag_3,max_glu_serum,A1Cresult,metformin,glipizide,glyburide,pioglitazone,rosiglitazone,insulin,change,diabetesMed
0,1,4,32,0,17,0,0,0,9,2,0,9,9,2,0,0,6,62,470,210,444,2,2,1,1,1,1,1,2,1,1
1,1,10,53,1,21,2,0,0,9,2,0,8,9,0,10,10,6,62,520,102,572,2,2,1,1,1,2,1,0,0,1
2,1,2,66,6,24,0,0,0,7,0,1,4,9,0,0,10,5,62,197,199,57,2,2,1,2,1,2,1,2,0,1
3,1,1,4,0,8,0,0,0,9,2,0,6,9,0,0,10,6,62,67,209,57,2,2,1,1,1,1,1,1,1,0
4,1,5,38,0,16,0,0,1,9,0,0,5,9,0,19,10,16,62,426,75,121,2,2,1,1,1,1,1,0,0,1
5,0,1,35,0,10,0,1,0,6,2,1,6,9,1,18,0,6,43,132,210,143,2,2,2,1,1,2,1,1,0,1
6,0,1,17,4,9,0,0,1,5,2,0,6,9,2,0,0,16,62,200,198,57,2,2,1,1,2,1,1,1,1,1
7,1,7,32,1,13,0,0,0,7,5,0,8,9,2,18,0,16,54,226,364,229,2,2,1,1,1,1,1,1,1,0
8,1,3,62,0,7,0,0,0,3,0,0,5,9,0,0,10,5,62,305,57,190,2,2,1,1,1,1,1,3,0,1
9,1,10,55,1,27,0,0,7,9,0,1,4,9,0,8,10,16,10,212,108,69,2,2,1,1,1,1,1,2,1,1


In [53]:
import model.data_loader as data_loader

train_set = data_loader.DiabetesDataset(train_df,
                                        continuous_features=features['continuous'],
                                        categorical_features=features['categorical'],
                                        output_features=features['output'])

print(train_set.n)
print(train_set[np.arange(10)])

18171
[array([[1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.]], dtype=float32), array([[ 4., 32.,  0., 17.,  0.,  0.,  0.,  9.],
       [10., 53.,  1., 21.,  2.,  0.,  0.,  9.],
       [ 2., 66.,  6., 24.,  0.,  0.,  0.,  7.],
       [ 1.,  4.,  0.,  8.,  0.,  0.,  0.,  9.],
       [ 5., 38.,  0., 16.,  0.,  0.,  1.,  9.],
       [ 1., 35.,  0., 10.,  0.,  1.,  0.,  6.],
       [ 1., 17.,  4.,  9.,  0.,  0.,  1.,  5.],
       [ 7., 32.,  1., 13.,  0.,  0.,  0.,  7.],
       [ 3., 62.,  0.,  7.,  0.,  0.,  0.,  3.],
       [10., 55.,  1., 27.,  0.,  0.,  7.,  9.]], dtype=float32), array([[  2,   0,   9,   9,   2,   0,   0,   6,  62, 470, 210, 444,   2,
          2,   1,   1,   1,   1,   1,   2,   1,   1],
       [  2,   0,   8,   9,   0,  10,  10,   6,  62, 520, 102, 572,   2,
          2,   1,   1,   1,   2,   1,   0,   0,   1],
       [  0,   1,   4,   9,   0,   0,  10,   5,  62, 197, 199,  57,   2,
          2,

## Dataloader

In [54]:
import model.data_loader as data_loader

dataloaders = data_loader.fetch_dataloaders(DATA_DIR, features, params)

print(len(dataloaders['train']))
print(len(dataloaders['test']))

for i, (target, x_cont, x_cat) in enumerate(dataloaders['train']):
    print(i, x_cont)

71
18
0 tensor([[ 6., 36.,  1.,  ...,  0.,  0.,  9.],
        [ 2., 56.,  0.,  ...,  1.,  1.,  7.],
        [ 4., 17.,  1.,  ...,  1.,  0.,  9.],
        ...,
        [ 3., 51.,  0.,  ...,  0.,  0.,  9.],
        [ 7., 50.,  1.,  ...,  0.,  1.,  7.],
        [11., 31.,  3.,  ...,  0.,  0.,  8.]])
1 tensor([[ 6.,  3.,  3.,  ...,  0.,  0.,  4.],
        [ 4., 47.,  0.,  ...,  0.,  1.,  7.],
        [ 9., 79.,  6.,  ...,  0.,  0.,  4.],
        ...,
        [ 8., 60.,  1.,  ...,  0.,  9.,  5.],
        [ 7., 60.,  0.,  ...,  1.,  0.,  8.],
        [ 3., 58.,  4.,  ...,  1.,  0.,  9.]])
2 tensor([[14., 71.,  4.,  ...,  0.,  1.,  9.],
        [ 2., 61.,  0.,  ...,  0.,  0.,  6.],
        [ 2., 36.,  6.,  ...,  0.,  0.,  9.],
        ...,
        [ 1., 49.,  3.,  ...,  0.,  0.,  4.],
        [ 6., 70.,  0.,  ...,  0.,  0.,  6.],
        [ 4., 22.,  6.,  ...,  0.,  0.,  9.]])
3 tensor([[ 2., 13.,  1.,  ...,  0.,  2.,  9.],
        [ 6., 20.,  1.,  ...,  0.,  0.,  5.],
        [ 2., 45.,  0., 