# Analysis of Models using only MIMIC Notes

## Imports & Inits

In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../')

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")
%matplotlib inline

import pickle
import numpy as np
import pandas as pd

from pathlib import Path
from scipy import stats
from itertools import combinations
from tqdm import tqdm_notebook as tqdm

from utils.metrics import BinaryAvgMetrics
from utils.plots import *

In [2]:
from lr.args import args as lr_args
from rf.args import args as rf_args
from gbm.args import args as gbm_args


transfer_thresholds = {
  'mimic_mlh': {
    'lr': lr_args.mimic_src_thresh,
    'rf': rf_args.mimic_src_thresh,
    'gbm': gbm_args.mimic_src_thresh,    
  },
  'mlh_mimic': {
    'lr': lr_args.mlh_src_thresh,
    'rf': rf_args.mlh_src_thresh,
    'gbm': gbm_args.mlh_src_thresh,    
  },
}

test_thresholds = {
  'lr': lr_args.mlh_src_test_thresh,
  'rf': rf_args.mlh_src_test_thresh,
  'gbm': gbm_args.mlh_src_test_thresh,    
}

In [3]:
path = Path('data')
workdir = path/f'workdir'
figdir = workdir/'figdir'

## Ensembles

In [4]:
def get_ensemble(ensembles, thresh, bams):  
  outputs = {}
  for ens_model in ensembles:
    key = '-'.join(ens_model)
    targs = bams[ens_model[0]].targs
    avg_thresh = np.array([thresh[model] for model in ens_model]).mean()
    max_thresh = max([thresh[model] for model in ens_model])
    probs = []
    for i in range(len(targs)):
      prob = []
      for model in ens_model:
        prob.append(bams[model].pos_probs[i])
      probs.append(np.stack(prob))

    avg_probs = [probs.mean(axis=0) for probs in probs]
    max_probs = [probs.max(axis=0) for probs in probs]

    avg_preds = [(probs > avg_thresh).astype(np.int64) for probs in avg_probs]
    max_preds = [(probs > max_thresh).astype(np.int64) for probs in max_probs]
    outputs[f'avg-{key}'] = (targs, avg_preds, avg_probs, avg_thresh)
    outputs[f'max-{key}'] = (targs, max_preds, max_probs, max_thresh)
    
  return outputs

In [5]:
def do_ttest(bams, model1, model2, metric):  
  if metric == 'sensitivity':
    x1 = bams[model1].sensitivities()
    x2 = bams[model2].sensitivities()
  elif metric == 'specificity':
    x1 = bams[model1].specificities()
    x2 = bams[model2].specificities()
  elif metric == 'ppv':
    x1 = bams[model1].ppvs()
    x2 = bams[model2].ppvs()
  elif metric == 'auroc':
    x1 = bams[model1].aurocs()
    x2 = bams[model2].aurocs()
  elif metric == 'npv':
    x1 = bams[model1].npvs()
    x2 = bams[model2].npvs()
  elif metric == 'f1':    
    x1 = bams[model1].f1s()
    x2 = bams[model2].f1s()

  t, p = stats.ttest_ind(x1, x2)
  return np.round(t, 2), max(np.round(p, 2), 0.001)

### Cross Testing

In [7]:
with open(workdir/f'vectordir/mlh2mimic.pkl', 'rb') as f:
  mlh2mimic_vec = pickle.load(f)
  x_train_mlh = pickle.load(f)
  x_test_mimic = pickle.load(f)
  y_train_mlh = pickle.load(f)
  y_test_mimic = pickle.load(f)
  
x_train_mlh.shape, y_train_mlh.shape, x_test_mimic.shape, y_test_mimic.shape

((116400, 60000), (116400,), (38112, 60000), (38112,))

In [14]:
model = 'gbm'
clf = pickle.load(open(workdir/model/'models/mlh_full.pkl', 'rb'))

prob = clf.predict_proba(x_test_mimic)
pos_prob = prob[:, 1]

In [15]:
threshold = thresholds[model]
pred = (pos_prob > threshold).astype(np.int64)
cm = confusion_matrix(y_test_mimic, pred)
tn,fp,fn,tp = cm[0][0],cm[0][1],cm[1][0],cm[1][1]
sensitivity = tp/(tp+fn)
specificity = tn/(tn+fp)
ppv = tp/(tp+fp)
npv = tn/(tn+fn)
f1 = (2*ppv*sensitivity)/(ppv+sensitivity)
auroc = roc_auc_score(y_test_mimic, pos_prob)

d = {
  'sensitivity': np.round(sensitivity, 3),
  'specificity': np.round(specificity, 3),
  'ppv': np.round(ppv, 3),
  'npv': np.round(npv, 3),
  'f1': np.round(f1, 3),
  'auroc': np.round(auroc, 3),
  'threshold': threshold,
}
metrics = pd.DataFrame(d.values(), index=d.keys(), columns=['Value'])
metrics

Unnamed: 0,Value
sensitivity,0.673
specificity,0.521
ppv,0.316
npv,0.829
f1,0.43
auroc,0.631
threshold,0.46


In [16]:
with open(workdir/model/'mlh_mimic_test_preds.pkl', 'wb') as f:
  pickle.dump(y_test_mimic, f)
  pickle.dump(prob, f)
  pickle.dump(pred, f)

#### Compute Average Ensembles

In [6]:
models = ['lr', 'rf', 'gbm']
bams = {}

for model in models:
  with open(workdir/model/f'mlh_mimic_test_preds.pkl', 'rb') as f:
    targs = pickle.load(f)
    probs = pickle.load(f)
    preds = pickle.load(f)
  bams[model] = BinaryAvgMetrics([targs], [preds], [probs[:, 1]])

In [7]:
# ens_models = [
#   ['lr', 'rf'],
#   ['lr', 'gbm'],
#   ['rf', 'gbm'],  
#   ['lr', 'rf', 'gbm'],
# ]

ens_models = [m for m in sum([list(map(list, combinations(models, i))) for i in range(len(models) + 1)], []) if len(m) > 1]

In [8]:
ensembles = get_ensemble(ens_models, test_thresholds, bams)

for model, vals in ensembles.items():
  bams[model] = BinaryAvgMetrics(*vals[:-1])  

In [10]:
final_metrics = {}

for key in bams.keys():
  final_metrics[key] = []
  for i in range(len(bams[key].get_avg_metrics())):
    final_metrics[key].append(bams[key].get_avg_metrics().iloc[i]['Value'])

In [11]:
final_metrics = pd.DataFrame(final_metrics, index=['sensitivity', 'specificity', 'ppv', 'auroc', 'npv', 'f1']).transpose()

best_models = pd.DataFrame([(final_metrics[metric].idxmax(), final_metrics[metric].max()) for metric in final_metrics], columns=['model', 'value'], index=['sensitivity', 'specificity', 'ppv', 'auroc', 'npv', 'f1'])

In [12]:
final_metrics

Unnamed: 0,sensitivity,specificity,ppv,auroc,npv,f1
lr,0.543,0.615,0.316,0.61,0.804,0.4
rf,0.665,0.465,0.29,0.581,0.809,0.404
gbm,0.673,0.521,0.316,0.631,0.829,0.43
avg-lr-rf,0.571,0.587,0.312,0.614,0.806,0.404
max-lr-rf,0.555,0.597,0.311,0.606,0.803,0.399
avg-lr-gbm,0.604,0.591,0.326,0.634,0.82,0.424
max-lr-gbm,0.689,0.505,0.314,0.632,0.832,0.431
avg-rf-gbm,0.682,0.509,0.313,0.625,0.83,0.429
max-rf-gbm,0.673,0.521,0.316,0.631,0.829,0.43
avg-lr-rf-gbm,0.622,0.569,0.321,0.632,0.821,0.424


In [13]:
best_models

Unnamed: 0,model,value
sensitivity,max-lr-gbm,0.689
specificity,lr,0.615
ppv,avg-lr-gbm,0.326
auroc,avg-lr-gbm,0.634
npv,max-lr-gbm,0.832
f1,max-lr-gbm,0.431


In [23]:
cte = [61, 58.1, 63.1, 61.4, 61.4, 60.6, 63.2, 62.5, 63.1, 63.2, 63.2]
ctr = [74.1, 73.7, 73.2, 74.4, 74.2, 74.4, 74.1, 74.2, 73.7, 74.6, 74.2]

In [24]:
[np.round(100 * (b - a) / a, 2) for a, b in zip(cte, ctr)]

[21.48, 26.85, 16.01, 21.17, 20.85, 22.77, 17.25, 18.72, 16.8, 18.04, 17.41]

###  Cross Training

#### Compute Average Ensembles

In [None]:
transfer = 'mlh_mimic'
thresholds = transfer_thresholds[transfer]
models = ['lr', 'rf', 'gbm']
bams = {}

for model in models:
  with open(workdir/model/f'{transfer}_preds.pkl', 'rb') as f:
    targs = pickle.load(f)
    probs = pickle.load(f)
    preds = pickle.load(f)
  bams[model] = BinaryAvgMetrics(targs, preds, [prob[:, 1] for prob in probs])

In [None]:
# ens_models = [
#   ['lr', 'rf'],
#   ['lr', 'gbm'],
#   ['rf', 'gbm'],  
#   ['lr', 'rf', 'gbm'],
# ]

ens_models = [m for m in sum([list(map(list, combinations(models, i))) for i in range(len(models) + 1)], []) if len(m) > 1]

In [None]:
ensembles = get_ensemble(ens_models, thresholds, bams)

for model, vals in ensembles.items():
  bams[model] = BinaryAvgMetrics(*vals[:-1])  

In [None]:
final_metrics = {}

for key in bams.keys():
  final_metrics[key] = []
  for i in range(len(bams[key].get_avg_metrics())):
    final_metrics[key].append(bams[key].get_avg_metrics().iloc[i]['Value'])

In [None]:
final_metrics = pd.DataFrame(final_metrics, index=['sensitivity', 'specificity', 'ppv', 'auroc', 'npv', 'f1']).transpose()

best_models = pd.DataFrame([(final_metrics[metric].idxmax(), final_metrics[metric].max()) for metric in final_metrics], columns=['model', 'value'], index=['sensitivity', 'specificity', 'ppv', 'auroc', 'npv', 'f1'])

#### Student-t Tests

In [None]:
models = list(final_metrics.index)
metrics = list(final_metrics.columns)

In [None]:
ttests = {}

for m1, m2 in combinations(models, 2):  
  ttests[f'{m1}:{m2}'] = {}
  for metric in metrics:
    ttests[f'{m1}:{m2}'][metric] = do_ttest(bams, m1, m2, metric)

ttests = pd.DataFrame(ttests).transpose()

#### Save to disk

In [None]:
pickle.dump(bams, open(workdir/f'{transfer}_bams.pkl', 'wb'))
final_metrics.to_csv(workdir/f'{transfer}_final_metrics.csv', float_format='%.3f')
best_models.to_csv(workdir/f'{transfer}_best_models.csv', float_format='%.3f')
ttests.to_csv(workdir/f'{transfer}_ttests.csv')

## Results

### Cross Testing

In [None]:
bams = BinaryAvgMetrics([y_test_mimic], [pred], [pos_prob])

In [None]:
bams = {}

for model in models:
  with open(workdir/model/f'{transfer}_preds.pkl', 'rb') as f:
    targs = pickle.load(f)
    probs = pickle.load(f)
    preds = pickle.load(f)
  bams[model] = BinaryAvgMetrics(targs, preds, [prob[:, 1] for prob in probs])

In [None]:
thresholds = transfer_thresholds[transfer]
models = ['lr', 'rf', 'gbm']
bams = {}

for model in models:
  with open(workdir/model/f'{transfer}_preds.pkl', 'rb') as f:
    targs = pickle.load(f)
    probs = pickle.load(f)
    preds = pickle.load(f)
  bams[model] = BinaryAvgMetrics(targs, preds, [prob[:, 1] for prob in probs])

### Cross Training

In [None]:
transfer = 'mlh_mimic'
bams = pickle.load(open(workdir/f'{transfer}_bams.pkl', 'rb'))
final_metrics = pd.read_csv(workdir/f'{transfer}_final_metrics.csv', index_col=0)
best_models = pd.read_csv(workdir/f'{transfer}_best_models.csv', index_col=0)
ttests = pd.read_csv(workdir/f'{transfer}_ttests.csv', index_col=0)

In [None]:
itr = iter(bams.keys())
bams.keys()

In [None]:
model = next(itr)
print(model)
bams[model].get_avg_metrics(conf=0.95)

In [None]:
final_metrics

In [None]:
best_models

In [None]:
print(ttests.to_latex())

## Box Plot

In [None]:
save = True

In [None]:
transfer = 'mlh_mimic'
bams = pickle.load(open(workdir/f'{transfer}_bams.pkl', 'rb'))
final_metrics = pd.read_csv(workdir/f'{transfer}_final_metrics.csv', index_col=0)
best_models = pd.read_csv(workdir/f'{transfer}_best_models.csv', index_col=0)
ttests = pd.read_csv(workdir/f'{transfer}_ttests.csv', index_col=0)

for k in bams.keys():
  bams[k.upper()] = bams.pop(k)

bams['AVG-ALL'] = bams.pop('AVG-LR-RF-GBM')
bams['MAX-ALL'] = bams.pop('MAX-LR-RF-GBM')

In [None]:
itr = iter(bams.keys())
bams.keys()

metrics = {}

for md in itr:
  df = pd.DataFrame()
  for k, m in bams[md].yield_metrics():
    df[k] = m
  df['model'] = md
  cols = list(df.columns)
  cols = [cols[-1]] + cols[:-1]
  df = df[cols]
  metrics[md] = df

plot_df = pd.concat(metrics.values())

In [None]:
met = 'AUC'

fig, ax = plt.subplots(1,1,figsize=(15,8))
sns.boxplot(x='model', y=met, data=plot_df, ax=ax)
ax.set_xlabel('')

if save:
  fig.savefig(figdir/f'{transfer}_{met.lower()}_box_plot.pdf', dpi=300)

## Mean AUC

In [None]:
def get_mean_tprs(bams, base_fpr):
  mean_tprs = {}  
  for model, bam in bams.items():
    tprs = []  
    for i, (targs, probs) in enumerate(zip(bam.targs, bam.pos_probs)):
      fpr, tpr, _ = roc_curve(targs, probs)
      tpr = interp(base_fpr, fpr, tpr)
      tpr[0] = 0.0
      tprs.append(tpr)

    tprs = np.array(tprs)
    mean_tprs[model] = tprs.mean(axis=0)
    
  return mean_tprs

In [None]:
des = 'all_'

if not des:
  plot_bams = {k: bams[k] for k in bams.keys() if '-' not in k}
  des = ''  
  names = plot_bams.keys()
  aucs = [model.auroc_avg() for _, model in plot_bams.items()]
  legends = [f'{model} ({auc})' for model, auc in zip(names, aucs)]
elif des == 'avg_':
  plot_bams = {k: bams[k] for k in bams.keys() if 'AVG' in k}
  names = [name[4:] for name in plot_bams.keys()]
  aucs = [model.auroc_avg() for _, model in plot_bams.items()]
  legends = [f'{model} ({auc})' for model, auc in zip(names, aucs)]  
elif des == 'max_':
  plot_bams = {k: bams[k] for k in bams.keys() if 'MAX' in k}
  names = [name[4:] for name in plot_bams.keys()]
  aucs = [model.auroc_avg() for _, model in plot_bams.items()]
  legends = [f'{model} ({auc})' for model, auc in zip(names, aucs)]  
elif des == 'all_':
  plot_bams = bams
  names = plot_bams.keys()
  aucs = [model.auroc_avg() for _, model in plot_bams.items()]
  legends = [f'{model} ({auc})' for model, auc in zip(names, aucs)]
  
legends  

In [None]:
base_fpr = np.linspace(0, 1, 100)
mean_tprs = get_mean_tprs(plot_bams, base_fpr)

fig, ax = plt.subplots(1, 1, figsize=(11, 8))
for i, (model, mean_tpr) in enumerate(mean_tprs.items()):
  ax.plot(base_fpr, mean_tpr)
ax.plot([0, 1], [0, 1], linestyle=':')  
ax.grid(b=True, which='major', color='#d3d3d3', linewidth=1.0)
ax.grid(b=True, which='minor', color='#d3d3d3', linewidth=0.5)
ax.set_ylabel('Sensitivity')
ax.set_xlabel('1 - Specificity')
ax.legend(legends)

if save:
  fig.savefig(figdir/f'{transfer}_{des}mean_auc.pdf', dpi=300)