In [15]:
import pickle
from random import shuffle, seed
import numpy as np
from util import plot_confusion_matrix
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, precision_recall_curve
from keras.models import Sequential, Model, load_model
from keras.layers.convolutional import Conv1D
from keras.layers import Dense, Dropout, Flatten, Input, MaxPooling1D, Embedding, Lambda
from keras.layers.merge import Concatenate
from keras.preprocessing import sequence
from keras.callbacks import ModelCheckpoint
from keras.layers.normalization import BatchNormalization
from keras.layers import Activation
from keras.optimizers import Adam
from keras.layers.embeddings import Embedding
import tensorflow as tf

import warnings
warnings.filterwarnings('ignore') # get rid of silly sklearn warnings

seed("sahai uwu")

In [16]:
ppath = "/Volumes/scottd/mimic_processed/preproced.bin"
data = pickle.load(open(ppath, 'rb')) # Tuples of (text, label) where label is 0 or 1
shuffle(data)

split_point = int(0.8 * len(data)) # Use 80% of our data for training and the rest for testing
train = data[:split_point]
test = data[split_point:]

train_data = [t[0] for t in train]
train_labels = [t[1] for t in train]
test_data = [t[0] for t in test]
test_labels = [t[1] for t in test]

print("Number of training samples: " + str(len(train)))
print("Number of test samples: " + str(len(test)))
print(np.unique(train_labels, return_counts=True))
print(np.unique(test_labels, return_counts=True))

lens = [len(d) for d in train_data]
print(train_data[3])
lens = sorted(lens)
print("Median length: " + str(lens[int(len(lens) * .8)]))

Number of training samples: 5660
Number of test samples: 1415
(array([0, 1]), array([3365, 2295]))
(array([0, 1]), array([841, 574]))
unit_admission date discharge date date_birth sex_f service neonatology history present_illness infant born_weeks gestation_yearold g2_p0 mother prenatal_screens follows blood_type positive_antibody negative_hbsag negative_rpr nonreactive_rubella immune_gbs negative mother history bipolar_disorder treated lithium pregnancy_uncomplicated day prior_delivery seen obs_office noted elevated blood_pressures admitted hospital care observation upon monitoring baby nonreassuring_fetal heart_rate tracing mother delivered_cesarean section spontaneous_rupture membranes delivery mother temperature receive_intrapartum antibiotics unknown reason nicu called delivery infant emerged_grimace transferred warmer dried_stimulated given bulb_suctioning infant required facial cpap apgars one_minute five_minutes respectively transferred newborn_nursery noted dusky poor muscle_t

In [17]:
# Hyperparameters
embedding_dim = 100
num_convs = [64, 128, 256, 512]
hidden_dims = [1024, 512, 256]
drops = [0.3, 0.4, 0.5]
sz = 3
top_k = 8

# Training parameters
batch_size = 128
num_epochs = 5

# Prepossessing parameters
sequence_length = 210

input_shape = (sequence_length, embedding_dim)

model_input = Input(shape=input_shape)

z = Conv1D(filters=64, kernel_size=sz, strides=1, padding="same")(model_input)

for nc in num_convs:
    z = Conv1D(filters=nc, kernel_size=sz, strides=1, padding="same")(z)
    z = BatchNormalization()(z)
    z = Activation('relu')(z)
    
    z = Conv1D(filters=nc, kernel_size=sz, strides=1, padding="same")(z)
    z = BatchNormalization()(z)
    z = Activation('relu')(z)
    
    z = MaxPooling1D(pool_size=3, strides=2, padding="same")(z)

def _top_k(x):
    x = tf.transpose(x, [0, 2, 1])
    k_max = tf.nn.top_k(x, k=top_k)
    return tf.reshape(k_max[0], (-1, num_convs[-1] * top_k))

#z = Lambda(_top_k, output_shape=(num_convs[-1] * top_k,))(z)
z = Flatten()(z) 
for i, hd in enumerate(hidden_dims):
    z = Dense(hd, activation="relu")(z)
    z = Dropout(drops[i])(z)
model_output = Dense(1, activation="sigmoid")(z)

model = Model(model_input, model_output)
model.summary()
opt = Adam(beta_1=0.70, beta_2=0.99)
model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])

Instructions for updating:
keep_dims is deprecated, use keepdims instead
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 210, 100)          0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 210, 64)           19264     
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 210, 64)           12352     
_________________________________________________________________
batch_normalization_1 (Batch (None, 210, 64)           256       
_________________________________________________________________
activation_1 (Activation)    (None, 210, 64)           0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 210, 64)           12352     
_________________________________________________________________
bat