In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
from common import *
from PatientVec.Experiments.hyperparam_exps import *

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [3]:
import argparse
parser = argparse.ArgumentParser(description='Run Diagnosis experiments')
parser.add_argument("--data_dir", type=str, required=True)
parser.add_argument('--display', dest='display', action='store_true')
parser.add_argument("--output_dir", type=str)
parser.add_argument("--mock", dest='mock', action='store_true')

args = parser.parse_args(['--data_dir=.', '--output_dir=outputs/', '--display'])

In [None]:
from dataloaders import readmission_dataset, mortality_dataset
data = readmission_dataset(args)
# data = mortality_dataset(args, _type='30day')

In [None]:
for n in [500, 1000, 1500, 2000, 3000] :
    args.n = n
    experiment_types['ts_experiments'](data, args)

In [None]:
structured = True
train_data, dev_data = get_basic_data(data, structured=structured, truncate=90)

In [None]:
idxs = list(set(range(len(train_data.X))) - set([1249]))
train_data = train_data.filter(idxs)

In [None]:
config = sru_configs[0](data, structured=structured, args=args)
if args.output_dir is not None :
    config['exp_config']['basepath'] = args.output_dir
print(config)

trainer = Trainer(BasicCT, config, _type=data.metrics_type, display_metrics=args.display)
trainer.train(train_data, dev_data, n_iters=10, save_on_metric=data.save_on_metric)

evaluator = Evaluator(BasicCT, trainer.model.dirname, _type=data.metrics_type, display_metrics=args.display)
_ = evaluator.evaluate(dev_data, save_results=True)
print('='*300)

In [None]:
train_data, dev_data = get_basic_data(data, structured=True, truncate=90)

In [None]:
results_auc = {}
results_pr = {}

In [None]:
results_auc_l1 = {}
results_pr_l1 = {}

In [None]:
for l in [1, 2, 3] :
    config = {'vocab' : data.vocab, 'stop_words' : True, 'exp_name' : data.name, 
              'type' : data.metrics_type, 'norm' : 'l'+str(l), 'constant_mul' : 1.0}
    lr = LR(config)
    lr.train(train_data)
    metrics = lr.evaluate(dev_data, save_results=True)

In [None]:
lr_models = {}
for c in [1.0, 2.0, 5.0, 10.0, 50.0, 100.0, 500.0, 1000.0, 10000, 100000] :
    config = {'vocab' : data.vocab, 'stop_words' : True, 'exp_name' : data.name, 
              'type' : data.metrics_type, 'norm' : 'l1', 'constant_mul' : c}
    lr = LR(config)
    lr.train(train_data)
    metrics = lr.evaluate(dev_data, save_results=True)
    results_auc_l1[c] = metrics['roc_auc']
    results_pr_l1[c] = metrics['pr_auc']

In [None]:
logx = list(results_auc.keys())
plt.plot(logx, list(results_auc.values()))

In [None]:
results_pr_l1

In [None]:
results_auc

In [None]:
l1_feat = lr_models[1].bow_classifier.estimators_[0].coef_[0]
l2_feat = lr_models[2].bow_classifier.estimators_[0].coef_[0]
l3_feat = lr_models[3].bow_classifier.estimators_[0].coef_[0]

In [None]:
import seaborn as sns
sns.kdeplot(sorted(l1_feat))
sns.kdeplot(sorted(l2_feat))
sns.kdeplot(sorted(l3_feat))

In [None]:
train_data = data.filter_data_length(data.get_data('train', structured=True), 90)

In [None]:
args.data_dir = '../../../SurgeryData/PatientVec/'

In [None]:
from dataloaders import hip_dataset
data = hip_dataset(args)

In [None]:
from PatientVec.Experiments.evaluate import get_evaluator
train_data, dev_data = get_basic_data(data, structured=False, truncate=90)

In [None]:
evaluator = get_evaluator(data, 'Basic/LSTM(hs=128)/')
output = evaluator.evaluate(dev_data)

In [None]:
config = {'vocab' : data.vocab, 'stop_words' : True, 'exp_name' : data.name, 
          'type' : data.metrics_type, 'norm' : 'l2', 'constant_mul' : 1.0}
lr = LR(config)
lr.train(train_data)
metrics = lr.evaluate(dev_data, save_results=True)
pred = lr.predict(dev_data)

In [None]:
lstm_y = output['predictions'][:, 0]
lr_y = pred[:, 1]
true_y = np.array(dev_data.y)[:, 0]

In [None]:
plt.scatter(lstm_y, lr_y, s=1, c=true_y)

In [None]:
from sklearn.metrics import *

In [None]:
bins = [0.0, 0.25, 0.5, 0.75]
results = {}
rocs = {}
for b in bins :
    idx = np.where(np.logical_and(lr_y >= b, lr_y < b+0.25))[0]
    results[b] = {'lr' : lr_y[idx], 'lstm' : lstm_y[idx], 'true' : true_y[idx]}
    rocs[b] = {'lr' : roc_auc_score(true_y[idx], lr_y[idx]), 'lstm' : roc_auc_score(true_y[idx], lstm_y[idx])}

In [None]:
idx_end = np.where(np.logical_or(lr_y < 0.25, lr_y > 0.75))[0]
idx_middle = np.where(np.logical_and(lr_y > 0.25, lr_y < 0.75))[0]

In [None]:
roc_auc_score(true_y[idx_end], lr_y[idx_end]), roc_auc_score(true_y[idx_end], lstm_y[idx_end])

In [None]:
roc_auc_score(true_y[idx_middle], lr_y[idx_middle]), roc_auc_score(true_y[idx_middle], lstm_y[idx_middle])

Saving Models
==============

In [None]:
from common import generate_latex_tables
keys_to_use = ['roc_auc', 'pr_auc']
generate_latex_tables(data, keys_to_use)

In [None]:
dirname = 'outputs/Readmission/Basic/'
exps = os.listdir(dirname)
for e in sorted(exps) :
    if 'Structured' in e :
        print(e)
        print_results_from_model(get_latest_model(os.path.join(dirname, e)))

In [None]:
dirname = 'outputs/Diagnosis/Basic/'
exps = os.listdir(dirname)
for e in sorted(exps) :
    if 'Structured' in e :
        print(e)
        print_results_from_model(get_latest_model(os.path.join(dirname, e)))