In [1]:
import os
import csv
import pickle
import random
import math
import dicom
import numpy as np
import scipy
from tqdm import tqdm
from tinydb import TinyDB, Query
from natsort import natsorted
from skimage import transform
from sklearn.externals import joblib

import matplotlib.pyplot as plt
%matplotlib inline
from PIL import Image

#### load and shuffle data

In [2]:
(pts_train, pts_train_val, 
 data_train, data_diffs_train, label_sys_train, label_dia_train, 
 data_train_val, data_diffs_train_val, label_sys_train_val, label_dia_train_val, 
 data_val, data_diffs_val, data_val_pt_index) = joblib.load('../data_proc/0-data_processed.pkl')

In [3]:
shuffle_index = list(range(data_train.shape[0]))
random.shuffle(shuffle_index)
data_train = data_train[shuffle_index]
label_sys_train = label_sys_train[shuffle_index]
label_dia_train = label_dia_train[shuffle_index]

shuffle_index = list(range(data_train_val.shape[0]))
random.shuffle(shuffle_index)
data_train_val = data_train_val[shuffle_index]
label_sys_train_val = label_sys_train_val[shuffle_index]
label_dia_train_val = label_dia_train_val[shuffle_index]

#### helper functions

In [4]:
def check_invalid_cdf(preds):
    problematic_i = []
    for i in range(preds.shape[0]):
        if np.min(preds[i,:]) < 0 or np.max(preds[i,:]) > 1:
            problematic_i.append(i)
            continue
        for j in range(preds.shape[1]-1):
            if preds[i,j] > preds[i,j+1]:
                problematic_i.append(i)
    return problematic_i

#### training

In [5]:
from keras.models import Sequential, Graph
from keras.layers.core import Activation, Dense, Dropout, Flatten, Merge, Reshape, Lambda
from keras.layers.core import TimeDistributedDense, TimeDistributedMerge
from keras.layers.recurrent import LSTM, GRU
from keras.layers.convolutional import Convolution2D, MaxPooling2D, AveragePooling2D, UpSampling2D, ZeroPadding2D
from keras.layers.advanced_activations import LeakyReLU, PReLU, ParametricSoftplus, ELU
from keras.layers.normalization import BatchNormalization
from keras.layers.noise import GaussianDropout, GaussianNoise
from keras.utils import np_utils, generic_utils
from keras.callbacks import ModelCheckpoint, EarlyStopping

Using Theano backend.


Using gpu device 3: Tesla K80 (CNMeM is disabled)


Continuous Ranked Probability Score (used as loss function)

In [6]:
from keras import backend as K
from theano import tensor as T

# currently for theano, would need modifications for tensorflow

def CRPS(y_true, y_pred):
    return K.mean(K.square(T.cumsum(y_pred, axis=-1) - y_true), axis=-1)

In [7]:
model_sys = Sequential()
model_sys.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th', input_shape=(30, 196, 196)))
model_sys.add(BatchNormalization())
model_sys.add(LeakyReLU(alpha=0.3))
model_sys.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_sys.add(BatchNormalization())
model_sys.add(LeakyReLU(alpha=0.3))
model_sys.add(MaxPooling2D(pool_size=(2,2), strides=None, border_mode='valid', dim_ordering='th'))
model_sys.add(Dropout(0.2))
model_sys.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_sys.add(BatchNormalization())
model_sys.add(LeakyReLU(alpha=0.3))
model_sys.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_sys.add(BatchNormalization())
model_sys.add(LeakyReLU(alpha=0.3))
model_sys.add(MaxPooling2D(pool_size=(2, 2), strides=None, border_mode='valid', dim_ordering='th'))
model_sys.add(Dropout(0.3))
model_sys.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_sys.add(BatchNormalization())
model_sys.add(LeakyReLU(alpha=0.3))
model_sys.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_sys.add(BatchNormalization())
model_sys.add(LeakyReLU(alpha=0.3))
model_sys.add(MaxPooling2D(pool_size=(2, 2), strides=None, border_mode='valid', dim_ordering='th'))
model_sys.add(Dropout(0.4))
model_sys.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_sys.add(BatchNormalization())
model_sys.add(LeakyReLU(alpha=0.3))
model_sys.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_sys.add(BatchNormalization())
model_sys.add(LeakyReLU(alpha=0.3))
model_sys.add(MaxPooling2D(pool_size=(2, 2), strides=None, border_mode='valid', dim_ordering='th'))
model_sys.add(Dropout(0.5))
model_sys.add(Flatten())
model_sys.add(Dense(2048, activation='relu'))
model_sys.add(Dropout(0.5))
model_sys.add(Dense(600))
model_sys.add(Activation('softmax'))

model_sys.compile(loss=CRPS, optimizer='adam')

In [9]:
batch_size = 32
nb_epoch = 100
    
checkpointer = ModelCheckpoint(filepath='../model_weights/0-0-convnet_basic_systole.hdf5', verbose=1, save_best_only=True)
earlystopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1)

model_sys.fit(data_train, label_sys_train, 
              batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=2,
              validation_data=(data_train_val, label_sys_train_val), shuffle=True,
              callbacks=[checkpointer, earlystopping])

Train on 5065 samples, validate on 266 samples
Epoch 1/100
106s - loss: 0.0466 - val_loss: 0.0325
Epoch 00000: val_loss improved from inf to 0.03247, saving model to ../model_weights/0-convnet_basic_systole.hdf5
Epoch 2/100
105s - loss: 0.0348 - val_loss: 0.0304
Epoch 00001: val_loss improved from 0.03247 to 0.03040, saving model to ../model_weights/0-convnet_basic_systole.hdf5
Epoch 3/100
106s - loss: 0.0326 - val_loss: 0.0361
Epoch 00002: val_loss did not improve
Epoch 4/100
105s - loss: 0.0306 - val_loss: 0.0322
Epoch 00003: val_loss did not improve
Epoch 5/100
105s - loss: 0.0291 - val_loss: 0.0300
Epoch 00004: val_loss improved from 0.03040 to 0.02997, saving model to ../model_weights/0-convnet_basic_systole.hdf5
Epoch 6/100
105s - loss: 0.0272 - val_loss: 0.0390
Epoch 00005: val_loss did not improve
Epoch 7/100
105s - loss: 0.0266 - val_loss: 0.0324
Epoch 00006: val_loss did not improve
Epoch 8/100
105s - loss: 0.0247 - val_loss: 0.0325
Epoch 00007: val_loss did not improve
Epoch

<keras.callbacks.History at 0x7fbba562b668>

In [8]:
model_dia = Sequential()
model_dia.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th', input_shape=(30, 196, 196)))
model_dia.add(BatchNormalization())
model_dia.add(LeakyReLU(alpha=0.3))
model_dia.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_dia.add(BatchNormalization())
model_dia.add(LeakyReLU(alpha=0.3))
model_dia.add(MaxPooling2D(pool_size=(2,2), strides=None, border_mode='valid', dim_ordering='th'))
model_dia.add(Dropout(0.2))
model_dia.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_dia.add(BatchNormalization())
model_dia.add(LeakyReLU(alpha=0.3))
model_dia.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_dia.add(BatchNormalization())
model_dia.add(LeakyReLU(alpha=0.3))
model_dia.add(MaxPooling2D(pool_size=(2, 2), strides=None, border_mode='valid', dim_ordering='th'))
model_dia.add(Dropout(0.3))
model_dia.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_dia.add(BatchNormalization())
model_dia.add(LeakyReLU(alpha=0.3))
model_dia.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_dia.add(BatchNormalization())
model_dia.add(LeakyReLU(alpha=0.3))
model_dia.add(MaxPooling2D(pool_size=(2, 2), strides=None, border_mode='valid', dim_ordering='th'))
model_dia.add(Dropout(0.4))
model_dia.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_dia.add(BatchNormalization())
model_dia.add(LeakyReLU(alpha=0.3))
model_dia.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_dia.add(BatchNormalization())
model_dia.add(LeakyReLU(alpha=0.3))
model_dia.add(MaxPooling2D(pool_size=(2, 2), strides=None, border_mode='valid', dim_ordering='th'))
model_dia.add(Dropout(0.5))
model_dia.add(Flatten())
model_dia.add(Dense(2048, activation='relu'))
model_dia.add(Dropout(0.5))
model_dia.add(Dense(600))
model_dia.add(Activation('softmax'))

model_dia.compile(loss=CRPS, optimizer='adam')

In [11]:
batch_size = 32
nb_epoch = 100
    
checkpointer = ModelCheckpoint(filepath='../model_weights/0-0-convnet_basic_diastole.hdf5', verbose=1, save_best_only=True)
earlystopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1)

model_dia.fit(data_train, label_dia_train, 
              batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=2,
              validation_data=(data_train_val, label_dia_train_val), shuffle=True,
              callbacks=[checkpointer, earlystopping])

Train on 5065 samples, validate on 266 samples
Epoch 1/100
105s - loss: 0.0590 - val_loss: 0.0452
Epoch 00000: val_loss improved from inf to 0.04515, saving model to ../model_weights/0-convnet_basic_diastole.hdf5
Epoch 2/100
105s - loss: 0.0502 - val_loss: 0.0453
Epoch 00001: val_loss did not improve
Epoch 3/100
105s - loss: 0.0461 - val_loss: 0.0666
Epoch 00002: val_loss did not improve
Epoch 4/100
105s - loss: 0.0434 - val_loss: 0.0463
Epoch 00003: val_loss did not improve
Epoch 5/100
105s - loss: 0.0407 - val_loss: 0.0402
Epoch 00004: val_loss improved from 0.04515 to 0.04021, saving model to ../model_weights/0-convnet_basic_diastole.hdf5
Epoch 6/100
105s - loss: 0.0386 - val_loss: 0.0363
Epoch 00005: val_loss improved from 0.04021 to 0.03633, saving model to ../model_weights/0-convnet_basic_diastole.hdf5
Epoch 7/100
105s - loss: 0.0375 - val_loss: 0.0432
Epoch 00006: val_loss did not improve
Epoch 8/100
105s - loss: 0.0361 - val_loss: 0.0452
Epoch 00007: val_loss did not improve
Ep

<keras.callbacks.History at 0x7fbb38011ef0>

In [12]:
model_diff_sys = Sequential()
model_diff_sys.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th', input_shape=(29, 196, 196)))
model_diff_sys.add(BatchNormalization())
model_diff_sys.add(LeakyReLU(alpha=0.3))
model_diff_sys.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_diff_sys.add(BatchNormalization())
model_diff_sys.add(LeakyReLU(alpha=0.3))
model_diff_sys.add(MaxPooling2D(pool_size=(2,2), strides=None, border_mode='valid', dim_ordering='th'))
model_diff_sys.add(Dropout(0.2))
model_diff_sys.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_diff_sys.add(BatchNormalization())
model_diff_sys.add(LeakyReLU(alpha=0.3))
model_diff_sys.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_diff_sys.add(BatchNormalization())
model_diff_sys.add(LeakyReLU(alpha=0.3))
model_diff_sys.add(MaxPooling2D(pool_size=(2, 2), strides=None, border_mode='valid', dim_ordering='th'))
model_diff_sys.add(Dropout(0.3))
model_diff_sys.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_diff_sys.add(BatchNormalization())
model_diff_sys.add(LeakyReLU(alpha=0.3))
model_diff_sys.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_diff_sys.add(BatchNormalization())
model_diff_sys.add(LeakyReLU(alpha=0.3))
model_diff_sys.add(MaxPooling2D(pool_size=(2, 2), strides=None, border_mode='valid', dim_ordering='th'))
model_diff_sys.add(Dropout(0.4))
model_diff_sys.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_diff_sys.add(BatchNormalization())
model_diff_sys.add(LeakyReLU(alpha=0.3))
model_diff_sys.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_diff_sys.add(BatchNormalization())
model_diff_sys.add(LeakyReLU(alpha=0.3))
model_diff_sys.add(MaxPooling2D(pool_size=(2, 2), strides=None, border_mode='valid', dim_ordering='th'))
model_diff_sys.add(Dropout(0.5))
model_diff_sys.add(Flatten())
model_diff_sys.add(Dense(2048, activation='relu'))
model_diff_sys.add(Dropout(0.5))
model_diff_sys.add(Dense(600))
model_diff_sys.add(Activation('softmax'))

model_diff_sys.compile(loss=CRPS, optimizer='adam')

In [13]:
batch_size = 32
nb_epoch = 100
    
checkpointer = ModelCheckpoint(filepath='../model_weights/0-0-convnet_diffs_systole.hdf5', verbose=1, save_best_only=True)
earlystopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1)

model_diff_sys.fit(data_diffs_train, label_sys_train, 
                   batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=2,
                   validation_data=(data_diffs_train_val, label_sys_train_val), shuffle=True,
                   callbacks=[checkpointer, earlystopping])

Train on 5065 samples, validate on 266 samples
Epoch 1/100
108s - loss: 0.0466 - val_loss: 0.0325
Epoch 00000: val_loss improved from inf to 0.03253, saving model to ../model_weights/0-convnet_diffs_systole.hdf5
Epoch 2/100
105s - loss: 0.0367 - val_loss: 0.0329
Epoch 00001: val_loss did not improve
Epoch 3/100
105s - loss: 0.0360 - val_loss: 0.0346
Epoch 00002: val_loss did not improve
Epoch 4/100
105s - loss: 0.0337 - val_loss: 0.0417
Epoch 00003: val_loss did not improve
Epoch 5/100
105s - loss: 0.0282 - val_loss: 0.0472
Epoch 00004: val_loss did not improve
Epoch 00004: early stopping


<keras.callbacks.History at 0x7fbba562b160>

In [14]:
model_diff_dia = Sequential()
model_diff_dia.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th', input_shape=(29, 196, 196)))
model_diff_dia.add(BatchNormalization())
model_diff_dia.add(LeakyReLU(alpha=0.3))
model_diff_dia.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_diff_dia.add(BatchNormalization())
model_diff_dia.add(LeakyReLU(alpha=0.3))
model_diff_dia.add(MaxPooling2D(pool_size=(2,2), strides=None, border_mode='valid', dim_ordering='th'))
model_diff_dia.add(Dropout(0.2))
model_diff_dia.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_diff_dia.add(BatchNormalization())
model_diff_dia.add(LeakyReLU(alpha=0.3))
model_diff_dia.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_diff_dia.add(BatchNormalization())
model_diff_dia.add(LeakyReLU(alpha=0.3))
model_diff_dia.add(MaxPooling2D(pool_size=(2, 2), strides=None, border_mode='valid', dim_ordering='th'))
model_diff_dia.add(Dropout(0.3))
model_diff_dia.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_diff_dia.add(BatchNormalization())
model_diff_dia.add(LeakyReLU(alpha=0.3))
model_diff_dia.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_diff_dia.add(BatchNormalization())
model_diff_dia.add(LeakyReLU(alpha=0.3))
model_diff_dia.add(MaxPooling2D(pool_size=(2, 2), strides=None, border_mode='valid', dim_ordering='th'))
model_diff_dia.add(Dropout(0.4))
model_diff_dia.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_diff_dia.add(BatchNormalization())
model_diff_dia.add(LeakyReLU(alpha=0.3))
model_diff_dia.add(Convolution2D(32, 3, 3, init='he_uniform', border_mode='same', dim_ordering='th'))
model_diff_dia.add(BatchNormalization())
model_diff_dia.add(LeakyReLU(alpha=0.3))
model_diff_dia.add(MaxPooling2D(pool_size=(2, 2), strides=None, border_mode='valid', dim_ordering='th'))
model_diff_dia.add(Dropout(0.5))
model_diff_dia.add(Flatten())
model_diff_dia.add(Dense(2048, activation='relu'))
model_diff_dia.add(Dropout(0.5))
model_diff_dia.add(Dense(600))
model_diff_dia.add(Activation('softmax'))

model_diff_dia.compile(loss=CRPS, optimizer='adam')

In [15]:
batch_size = 32
nb_epoch = 100
    
checkpointer = ModelCheckpoint(filepath='../model_weights/0-0-convnet_diffs_diastole.hdf5', verbose=1, save_best_only=True)
earlystopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1)

model_diff_dia.fit(data_diffs_train, label_dia_train, 
                   batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=2,
                   validation_data=(data_diffs_train_val, label_dia_train_val), shuffle=True,
                   callbacks=[checkpointer, earlystopping])

Train on 5065 samples, validate on 266 samples
Epoch 1/100
105s - loss: 0.0603 - val_loss: 0.0507
Epoch 00000: val_loss improved from inf to 0.05065, saving model to ../model_weights/0-convnet_diffs_diastole.hdf5
Epoch 2/100
105s - loss: 0.0554 - val_loss: 0.0506
Epoch 00001: val_loss improved from 0.05065 to 0.05060, saving model to ../model_weights/0-convnet_diffs_diastole.hdf5
Epoch 3/100
105s - loss: 0.0549 - val_loss: 0.0502
Epoch 00002: val_loss improved from 0.05060 to 0.05021, saving model to ../model_weights/0-convnet_diffs_diastole.hdf5
Epoch 4/100
105s - loss: 0.0540 - val_loss: 0.0507
Epoch 00003: val_loss did not improve
Epoch 5/100
105s - loss: 0.0496 - val_loss: 0.0495
Epoch 00004: val_loss improved from 0.05021 to 0.04952, saving model to ../model_weights/0-convnet_diffs_diastole.hdf5
Epoch 6/100
105s - loss: 0.0418 - val_loss: 0.0572
Epoch 00005: val_loss did not improve
Epoch 7/100
105s - loss: 0.0361 - val_loss: 0.0541
Epoch 00006: val_loss did not improve
Epoch 8/10

<keras.callbacks.History at 0x7fbb24ae9208>

#### predict

In [10]:
model_sys.load_weights('../model_weights/0-0-convnet_basic_systole.hdf5')
model_dia.load_weights('../model_weights/0-0-convnet_basic_diastole.hdf5')

preds_sys = model_sys.predict(data_val, verbose=0)
preds_dia = model_dia.predict(data_val, verbose=0)

preds_sys = np.clip(np.cumsum(preds_sys, axis=-1), 0, 1)
preds_dia = np.clip(np.cumsum(preds_dia, axis=-1), 0, 1)

In [11]:
preds_sys_pt = {}
preds_dia_pt = {}
for pt in range(501, 701):
    preds_sys_pt[pt] = np.mean(preds_sys[np.where(data_val_pt_index == pt)[0]], axis=0)
    preds_dia_pt[pt] = np.mean(preds_dia[np.where(data_val_pt_index == pt)[0]], axis=0)

In [12]:
with open('../data/sample_submission_validate.csv', 'r') as fi:
    reader = csv.reader(fi)
    header = next(reader)
    
    with open('../submissions/0-0-convnet_basic.csv', 'w') as fo:
        writer = csv.writer(fo, lineterminator='\n')
        writer.writerow(header)
        for rowin in tqdm(reader):
            _id = rowin[0]
            pt, mode = _id.split('_')
            rowout = [_id]
            if mode.lower() == 'systole':
                rowout.extend(preds_sys_pt[int(pt)].tolist())
            elif mode.lower() == 'diastole':
                rowout.extend(preds_dia_pt[int(pt)].tolist())
            else:
                raise
            writer.writerow(rowout)

