In [1]:
import tensorflow as tf
import numpy
import os
import json, pickle
import pandas
from functools import partial, reduce
import importlib

import sys
sys.path.append('../libs')

import flacdb
import prepare_data
import initialize
import data_pipeline
import conv_model
import plot_batch

In [2]:
! ls -lh /scr1/checkpoints

total 11G
-rw-r--r-- 1 kuprel users  57K Nov 18 08:52 1117408_20191117-112416.data-00000-of-00002
-rw-r--r-- 1 kuprel users  40M Nov 18 08:52 1117408_20191117-112416.data-00001-of-00002
-rw-r--r-- 1 kuprel users  27K Nov 18 08:52 1117408_20191117-112416.index
-rw-r--r-- 1 kuprel users  57K Nov 18 03:07 1118450_20191117-135257.data-00000-of-00002
-rw-r--r-- 1 kuprel users  40M Nov 18 03:07 1118450_20191117-135257.data-00001-of-00002
-rw-r--r-- 1 kuprel users  27K Nov 18 03:07 1118450_20191117-135257.index
-rw-r--r-- 1 kuprel users  57K Nov 18 12:22 1124961_20191118-10101100100.ckpt.data-00000-of-00002
-rw-r--r-- 1 kuprel users  40M Nov 18 12:22 1124961_20191118-10101100100.ckpt.data-00001-of-00002
-rw-r--r-- 1 kuprel users  27K Nov 18 12:22 1124961_20191118-10101100100.ckpt.index
-rw-r--r-- 1 kuprel users  57K Nov 18 14:30 1124961_20191118-10101100200.ckpt.data-00000-of-00002
-rw-r--r-- 1 kuprel users  40M Nov 18 14:30 1124961_20191118-10101100200.ckpt.data-00001-of-00002
-rw-r--r-- 1 k

In [4]:
%%time

H = initialize.load_hypes()
initial_data_path = '/scr1/mimic/initial_data/'

# sig_data, metadata = initialize.load_initial_data(save_path=initial_data_path)
sig_data, metadata = initialize.load_initial_data(load_path=initial_data_path)
diagnosis = initialize.load_diagnosis(H['icd_codes'], metadata)
diagnosis = initialize.augment_diagnosis(diagnosis, metadata)
diagnosis = initialize.fix_diagnosis(diagnosis)
diagnosis_priors = (diagnosis == 1).sum() / (diagnosis != 0).sum()
diagnosis_priors['measured_systemic_hypertension'] = 0.5
diagnosis_priors['measured_pulmonary_hypertension'] = 0.5
diagnosis = initialize.conform_diagnosis(diagnosis, metadata)
partition = initialize.load_partition(H['input_sigs_validation'], sig_data)

CPU times: user 2.94 s, sys: 156 ms, total: 3.09 s
Wall time: 3.1 s


In [5]:
%%time
(diagnosis[partition['validation']] == 1).any(level=0, axis=0).sum()

CPU times: user 64 ms, sys: 0 ns, total: 64 ms
Wall time: 65.4 ms


4019                   236
41071                   42
41401                  119
4240                    28
4241                    27
42731                  152
42732                   16
4275                    25
4280                   143
42833                   16
78552                   65
99592                   92
gender_F               228
gender_M               371
race_asian              10
race_black              51
race_hispanic           29
race_white             382
age_at_least_75        156
height_at_least_70     115
weight_at_least_100    108
died                    97
dtype: int64

In [5]:
%%time

dataset = {}
for part in ['train', 'validation']:
    I = partition[part]
    row_lengths = initialize.get_row_lengths(metadata[I])
    args = [metadata[I], sig_data[I], diagnosis[I], row_lengths]
    tensors = initialize.get_tensors(H, *args)
    dataset[part] = data_pipeline.build(H, tensors, part)

CPU times: user 18.2 s, sys: 200 ms, total: 18.4 s
Wall time: 18.4 s


In [7]:
importlib.reload(conv_model)

<module 'conv_model' from '../libs/conv_model.py'>

In [8]:
model = conv_model.build(H, diagnosis_priors)

model.fit(
    dataset['train'],
    validation_data = dataset['validation'],
    steps_per_epoch = 2**7,
    validation_steps = 1,
)

Train for 128 steps, validate for 1 steps
 19/128 [===>..........................] - ETA: 16:26 - loss: 1.3292 - pressure_loss: 1.6601 - diagnosis_loss: 1.1104 - pressure_ABP_systolic: 21.4015 - pressure_ABP_diastolic: 11.9643 - pressure_ABP_pulse: 18.4847 - pressure_CVP_systolic: 6.4757 - pressure_CVP_diastolic: 5.5955 - pressure_CVP_pulse: 5.4249 - pressure_ICP_systolic: 5.1387 - pressure_ICP_diastolic: 4.5058 - pressure_ICP_pulse: 3.4367 - pressure_PAP_systolic: 10.4759 - pressure_PAP_diastolic: 8.0560 - pressure_PAP_pulse: 8.5764 - diagnosis_25000_diabetes_sensitivity: 0.5435 - diagnosis_25000_diabetes_specificity: 0.4972 - diagnosis_25000_diabetes_accuracy: 0.5203 - diagnosis_25000_diabetes_precise_sensitivity: 0.0981 - diagnosis_25000_diabetes_precise_threshold: 0.9851 - diagnosis_2720_hypercholesterolemia_sensitivity: 0.2426 - diagnosis_2720_hypercholesterolemia_specificity: 0.7832 - diagnosis_2720_hypercholesterolemia_accuracy: 0.5129 - diagnosis_2720_hypercholesterolemia_preci

KeyboardInterrupt: 

In [6]:
sig_data = pandas.read_hdf('/scr-ssd/mimic/sig_data.hdf')

In [7]:
sig_data['sig_name'].value_counts()

II        1871134
PLETH     1579432
RESP      1434547
V         1302166
AVR        914718
ABP        611101
III        310765
CVP        247842
I          173964
MCL        126479
ICP         67941
PAP         53724
MCL1        43520
ART         37237
AVF         33734
AVL         21747
UAP          6313
PLETHR       2866
AOBP         2079
PLETHL       2051
UVP          1220
IC2           712
RAP           526
CO2           443
ECG           428
V1            343
IC1           305
P1            223
LAP           102
BAP            66
V2             45
P4             11
FAP             9
V5              6
AO              5
V3              3
P3              3
P2              2
Name: sig_name, dtype: int64