In [1]:
%matplotlib widget

import tensorflow as tf
from matplotlib import pyplot
import matplotlib
import matplotlib.cm as colormap
import numpy
import os
import json, pickle
import pandas
from functools import partial, reduce
import importlib
from sklearn import manifold
from scipy import stats

import sys
sys.path.append('../libs')

import flacdb
import prepare_data
import initialize
import data_pipeline
import conv_model
import plot_batch
import generate_report_kfold
import icd_util

pyplot.style.use('dark_background')

In [2]:
H, _, Y0, P0, metadata, priors = generate_report_kfold.generate_predictions(
    model_id = 1469795,
    fold_index = 0,
    checkpoint_index = 2,
    example_count_log2 = 14
)
_ = None

found hypes ../hypes/1469795_20200512-210303.json 
found weights /scr1/checkpoints/1469795_20200512-210303_00384.ckpt
loading predictions


In [3]:
H, _, Y1, P1, metadata, priors = generate_report_kfold.generate_predictions(
    model_id = 1469816,
    fold_index = 1,
    checkpoint_index = 2,
    example_count_log2 = 14
)
_ = None

found hypes ../hypes/1469816_20200512-213718.json 
found weights /scr1/checkpoints/1469816_20200512-213718_00384.ckpt
loading predictions


In [4]:
H, _, Y2, P2, metadata, priors = generate_report_kfold.generate_predictions(
    model_id = 1470209,
    fold_index = 2,
    checkpoint_index = 2,
    example_count_log2 = 14
)
_ = None

found hypes ../hypes/1470209_20200513-050523.json 
found weights /scr1/checkpoints/1470209_20200513-050523_00384.ckpt
loading predictions


In [5]:
Z = numpy.vstack([P0, P1, P2])
Z = numpy.log(Z / (1 - Z))
# X = {k: numpy.concatenate([X0[k], X1[k], X2[k]], axis=0) for k in X0}
Y = {k: numpy.concatenate([Y0[k], Y1[k], Y2[k]], axis=0) for k in Y0}
y = Y['diagnosis']

In [6]:
group_names = icd_util.load_group_strings()

def get_name(code):
    if code not in group_names:
        return code.replace('_', ' ').title()
    name = code + ': ' + group_names[code]
    name = name.replace('/', '_')
    for character in "',()[]":
        name = name.replace(character, '')
    return name

names = [get_name(i).strip() for i in priors.index]

In [7]:
def gaussian(diff, sig):
    a = sig * numpy.sqrt(2*numpy.pi)
    b = -2 * sig**2
    return numpy.exp(diff**2 / b) / a

from scipy import stats
mu, sig = numpy.random.rand(2) * 5
p1 = stats.norm(mu, sig).pdf(0)
p2 = gaussian(mu, sig)
assert(numpy.isclose(p1, p2))

In [16]:
%%time

low, high = numpy.percentile(Z, [0.1, 99.9], axis=0)
domain = numpy.linspace(low, high, 1000, axis=1, dtype='float32')
D = numpy.expand_dims(Z, axis=-1) - numpy.expand_dims(domain, axis=0)
D = gaussian(D, 0.4)
density = D.sum(axis=0)
sums = density.sum(axis=1) * (domain[:, 1] - domain[:, 0])
density /= numpy.expand_dims(sums, axis=-1)
density_pos = (D * numpy.expand_dims(y == 1, axis=-1)).sum(axis=0)
density_neg = (D * numpy.expand_dims(y == -1, axis=-1)).sum(axis=0)
prior = (y == 1).sum(axis=0) / (y != 0).sum(axis=0)
posterior = (density_pos + 1) / (density_neg + density_pos + 1)
risk = numpy.log2(posterior / numpy.expand_dims(prior, axis=-1))

CPU times: user 1min 13s, sys: 15.6 s, total: 1min 29s
Wall time: 1min 29s


In [11]:
pyplot.style.use('default')

In [17]:
j = next(i for i in range(len(names)) if 'cardiogenic' in names[i].lower())
print(j, names[j])
pyplot.close(0)
fig, ax = pyplot.subplots(num=0)
# ax.plot(domain[j], density[j], 'w');
ax.plot(domain[j], density_pos[j], 'r');
ax.plot(domain[j], density_neg[j], 'g');
ax.set_ylabel('Count')
ax.set_xlabel('Prediction')
ax.set_title(names[j])

75 785.51: Cardiogenic Shock


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 1.0, '785.51: Cardiogenic Shock')

In [61]:
y.shape

(43557, 90)

In [69]:
pyplot.close(1)
fig, ax = pyplot.subplots(num=1)
ax.plot(domain[j], risk[j], 'w');

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [316]:
is_less = numpy.expand_dims(risk[:, ::-1], -1) < -numpy.arange(1, 6)
low_risk_indices = numpy.argmax(is_less, axis=1)
is_invalid = low_risk_indices == 0
low_risk_indices = risk.shape[1] - 1 - low_risk_indices
low_risk_indices[is_invalid] = -1

In [317]:
low = numpy.percentile(Z, 10, axis=0)
is_high_enough = domain > numpy.expand_dims(low, -1)
high_risk_indices = numpy.stack([
    numpy.argmax(numpy.logical_and(risk > thresh, is_high_enough), axis=1)
    for thresh in [1, 2]
]).T
high_risk_indices[high_risk_indices == 0] = -1

In [340]:
n, m = risk.shape
pmass = density * (domain[:, 1:2] - domain[:, 0:1])

J = numpy.expand_dims(high_risk_indices, -1)
J[J<0] = risk.shape[1]
mask = numpy.tile(numpy.arange(m), [n, J.shape[1], 1]) > J
pmass_high_risk = numpy.expand_dims(pmass, axis=1)
pmass_high_risk = (pmass_high_risk * mask).sum(axis=-1)

J = numpy.expand_dims(low_risk_indices, -1)
mask = numpy.tile(numpy.arange(m), [n, J.shape[1], 1]) < J
pmass_low_risk = numpy.expand_dims(pmass, axis=1)
pmass_low_risk = (pmass_low_risk * mask).sum(axis=-1)

In [341]:
for i in range(J.shape[0]):
    p = 100 * pmass_high_risk[i, -1]
    if p > 2:
        print(round(p, 1), names[i])

3.7 191: Malignant Neoplasm Of Brain
2.9 424.1: Aortic Valve Disorders
2.7 425: Cardiomyopathy
3.6 426: Conduction Disorders
3.9 427.41: Ventricular Fibrillation
4.5 437.3: Cerebral Aneurysm Nonruptured
2.3 440: Atherosclerosis
4.3 572.4: Hepatorenal Syndrome
5.4 785.51: Cardiogenic Shock


In [342]:
for i in range(J.shape[0]):
    p = 100 * pmass_low_risk[i, -1]
    if p > 2:
        print(round(p, 1), names[i])

12.8 348.4: Compression Of Brain
3.4 348.5: Cerebral Edema
6.2 424.1: Aortic Valve Disorders
23.4 437.3: Cerebral Aneurysm Nonruptured
10.2 785.51: Cardiogenic Shock
3.5 Age At Least 75


In [339]:
scores = pmass_low_risk.sum(1) + pmass_high_risk.sum(1)
for i in numpy.argsort(-scores):
    print(names[i])

437.3: Cerebral Aneurysm Nonruptured
785.51: Cardiogenic Shock
571.1: Acute Alcoholic Hepatitis
191: Malignant Neoplasm Of Brain
427.41: Ventricular Fibrillation
198.3: Brain And Spinal Cord
348.4: Compression Of Brain
410: Acute Myocardial Infarction
572.4: Hepatorenal Syndrome
426: Conduction Disorders
425: Cardiomyopathy
348.31: Metabolic Encephalopathy
428.2: Systolic Heart Failure
430: Subarachnoid Hemorrhage
396: Diseases Of Mitral And Aortic Valves
348.5: Cerebral Edema
410.7: Subendocardial Infarction
Age At Least 75
424.1: Aortic Valve Disorders
431: Intracerebral Hemorrhage
428.0: Congestive Heart Failure Unspecified
428: Heart Failure
571.2: Alcoholic Cirrhosis Of Liver
572.2: Hepatic Coma
155.0: Liver Primary
428.3: Diastolic Heart Failure
155: Malignant Neoplasm Of Liver And Intrahepatic Bile Ducts
414.0: Coronary Atherosclerosis
397.0: Diseases Of Tricuspid Valve
427.1: Paroxysmal Ventricular Tachycardia
440: Atherosclerosis
410-414: Ischemic Heart Disease
427.5: Cardiac 