In [71]:
import tensorflow as tf
from time import time
import numpy
import os
import json
import pickle
import pandas
import datetime
from functools import partial, reduce
import importlib

import sys
sys.path.append('../libs')

import data_pipeline
import conv_model
import initialize
import prepare_data
import flacdb
import plot_batch
import loss_metrics

In [2]:
! nvidia-smi

Mon Nov 18 07:36:34 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 410.78       Driver Version: 410.78       CUDA Version: 10.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX TIT...  On   | 00000000:89:00.0 Off |                  N/A |
| 22%   37C    P8    18W / 250W |      0MiB / 12212MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [3]:
%%time

H = initialize.load_hypes()
initial_data_path = '/scr1/mimic/initial_data/'

# sig_data, metadata = initialize.load_initial_data(save_path=initial_data_path)
sig_data, metadata = initialize.load_initial_data(load_path=initial_data_path)
diagnosis = initialize.load_diagnosis(H['icd_codes'], metadata)
diagnosis = initialize.augment_diagnosis(diagnosis, metadata)
diagnosis = initialize.fix_diagnosis(diagnosis)
diagnosis_priors = (diagnosis == 1).sum() / (diagnosis != 0).sum()
diagnosis_priors['measured_systemic_hypertension'] = 0.5
diagnosis_priors['measured_pulmonary_hypertension'] = 0.5
diagnosis = initialize.conform_diagnosis(diagnosis, metadata)
partition = initialize.load_partition(H, sig_data)

CPU times: user 3.33 s, sys: 332 ms, total: 3.66 s
Wall time: 3.67 s


In [4]:
%%time

I = partition['validation']
row_lengths = initialize.get_row_lengths(metadata[I])
args = [metadata[I], sig_data[I], diagnosis[I], row_lengths]
tensors = initialize.get_tensors(H, *args)
dataset = data_pipeline.build(H, tensors, 'train')

CPU times: user 3.93 s, sys: 612 ms, total: 4.54 s
Wall time: 4.52 s


In [5]:
%%time

model = conv_model.build(H, diagnosis_priors)

CPU times: user 26.1 s, sys: 624 ms, total: 26.7 s
Wall time: 26.4 s


In [6]:
! ls /scr1/checkpoints/

1117408_20191117-112416.data-00000-of-00002
1117408_20191117-112416.data-00001-of-00002
1117408_20191117-112416.index
1118450_20191117-135257.data-00000-of-00002
1118450_20191117-135257.data-00001-of-00002
1118450_20191117-135257.index
checkpoint


In [7]:
model_id = '1118450_20191117-135257'
checkpoint_path = '/scr1/checkpoints/' + model_id
model.load_weights(checkpoint_path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f40a94aae50>

In [14]:
%%time

inputs, labels, predictions = [], [], []
for x, y in dataset.take(2**15 // H['batch_size']):
    inputs.append(x)
    labels.append(y)
    x_ = {**x, 'mask': tf.cast(x['mask'], 'float')}
    predictions.append(model.predict(x_))

In [62]:
%%time

keys = enumerate(['pressure', 'diagnosis'])
P = {k: numpy.concatenate([p[i] for p in predictions]) for i, k in keys}
X = {k: numpy.concatenate([x[k] for x in inputs]) for k in inputs[0].keys()}
Y = {k: numpy.concatenate([y[k] for y in labels]) for k in labels[0].keys()}

CPU times: user 68 ms, sys: 200 ms, total: 268 ms
Wall time: 265 ms


In [124]:
from matplotlib import pyplot


def get_code_name(code):
    if code in loss_metrics.CODE_NAMES:
        name = loss_metrics.CODE_NAMES[code]
    else:
        name = code
    name = name.replace('_', ' ').title()
    return name


def plot_diagnosis_distributions(p_neg, p_pos, threshold, sensitivity, code):
    fig = pyplot.figure(code, facecolor='black', figsize=[7, 4])
    fig.clear()
    fig.gca().set_facecolor('black')
    bins = pyplot.hist(p_neg, bins=100, alpha=1, color='green');
    bins = pyplot.hist(p_pos, bins=100, alpha=.5, color='red');
    pyplot.plot([threshold] * 2, [0, 100], '--w')
    pyplot.xlabel('Probability', color='white')
    fig.gca().tick_params(axis='x', colors='white')
    legend = ['Threshold', 'Negative', 'Positive']
    legend = fig.legend(legend, facecolor='black')
    for text in legend.get_texts():
        text.set_color('white')
    pct = round(sensitivity.numpy() * 100, 2)
    title = '{}% of {} cases detected'.format(pct, get_code_name(code))
    pyplot.title(title, color='white')
#     display(fig)

In [120]:
sensitivities = []
for i in range(Y['diagnosis'].shape[1]):
    y_true = Y['diagnosis'][:, i]
    y_pred = P['diagnosis'][:, i]
    p_neg = y_pred[y_true == -1]
    p_pos = y_pred[y_true ==  1]
    thresholds.append(loss_metrics._precise_threshold(y_true, y_pred))
    sensitivities.append(loss_metrics._precise_sensitivity(y_true, y_pred))

In [125]:
%matplotlib widget

for i in tf.argsort(sensitivities, direction='DESCENDING')[:5]:
    plot_diagnosis_distributions(
        P['diagnosis'][Y['diagnosis'][:, i] == -1, i], 
        P['diagnosis'][Y['diagnosis'][:, i] ==  1, i], 
        thresholds[i], 
        sensitivities[i], 
        diagnosis.columns[i]
    )

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …