In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
from common import *
from PatientVec.Experiments.hyperparam_exps import *

In [None]:
import argparse
parser = argparse.ArgumentParser(description='Run Diagnosis experiments')
parser.add_argument("--data_dir", type=str, required=True)
parser.add_argument('--display', dest='display', action='store_true')
parser.add_argument("--output_dir", type=str)
parser.add_argument("--mock", dest='mock', action='store_true')

args = parser.parse_args(['--data_dir=.', '--output_dir=outputs/', '--display'])

In [None]:
from dataloaders import mortality_dataset
data = mortality_dataset(args)

In [None]:
train_data, dev_data = get_basic_data(data, structured=True, truncate=90)

In [None]:
results_auc = {}
results_pr = {}

In [None]:
results_auc_l1 = {}
results_pr_l1 = {}

In [None]:
for l in [1, 2, 3] :
    config = {'vocab' : data.vocab, 'stop_words' : True, 'exp_name' : data.name, 
              'type' : data.metrics_type, 'norm' : 'l'+str(l), 'constant_mul' : 1.0}
    lr = LR(config)
    lr.train(train_data)
    metrics = lr.evaluate(dev_data, save_results=True)

In [None]:
e = attention_configs[1]
structured = True
config = e(data, structured=structured, args=args)
if args.output_dir is not None :
    config['exp_config']['basepath'] = args.output_dir
if hasattr(args, 'modify_config') :
    config = args.modify_config(config)
print(config)

trainer = Trainer(BasicCT, config, _type=data.metrics_type, display_metrics=args.display)
trainer.train(train_data, dev_data, save_on_metric=data.save_on_metric)

evaluator = Evaluator(BasicCT, trainer.model.dirname, _type=data.metrics_type, display_metrics=args.display)
_ = evaluator.evaluate(dev_data, save_results=True)
print('='*300)

In [None]:
lr_models = {}
for c in [1.0, 2.0, 5.0, 10.0, 50.0, 100.0, 500.0, 1000.0, 10000, 100000] :
    config = {'vocab' : data.vocab, 'stop_words' : True, 'exp_name' : data.name, 
              'type' : data.metrics_type, 'norm' : 'l1', 'constant_mul' : c}
    lr = LR(config)
    lr.train(train_data)
    metrics = lr.evaluate(dev_data, save_results=True)
    results_auc_l1[c] = metrics['roc_auc']
    results_pr_l1[c] = metrics['pr_auc']

In [None]:
logx = list(results_auc.keys())
plt.plot(logx, list(results_auc.values()))

In [None]:
results_pr_l1

In [None]:
results_auc

In [None]:
l1_feat = lr_models[1].bow_classifier.estimators_[0].coef_[0]
l2_feat = lr_models[2].bow_classifier.estimators_[0].coef_[0]
l3_feat = lr_models[3].bow_classifier.estimators_[0].coef_[0]

In [None]:
import seaborn as sns
sns.kdeplot(sorted(l1_feat))
sns.kdeplot(sorted(l2_feat))
sns.kdeplot(sorted(l3_feat))

In [None]:
vc = [0] * data.vocab.vocab_size
for i, v in data.vocab.idx2word.items() :
    vc[i] = v

In [None]:
from PatientVec.Experiments.modifiable_config_exp import attention_configs
avg_attn_config = attention_configs[0](data, structured=True, args=args)

In [None]:
avg_attn_config['model']['embedder']['type'] = 'elmo_embedder'
del avg_attn_config['model']['embedder']['embedding_file']
avg_attn_config['model']['embedder']['elmo_options'] = {
    'options_file' : '../../elmo_2x4096_512_2048cnn_2xhighway_options.json',
    'weight_file' : '../../elmo_2x4096_512_2048cnn_2xhighway_weights_PubMed_only.hdf5',
    'vocab_to_cache' : vc
}

In [None]:
avg_attn_config['training_config']['common']['bsize'] = 16

In [None]:
from PatientVec.models.Vanilla import ClassificationTrainer as BasicCT
from PatientVec.trainer import Trainer, Evaluator

In [None]:
import logging.config
logging.config.dictConfig({
    'version': 1,
    'disable_existing_loggers': True
})

In [None]:
dev_data = data.filter_data_length(data.get_data('dev', structured=True), 90)

In [None]:
train_data = data.filter_data_length(data.get_data('train', structured=True), 90)

In [None]:
trainer = Trainer(BasicCT, avg_attn_config, _type=data.metrics_type, display_metrics=args.display)
trainer.train(train_data, dev_data, save_on_metric=data.save_on_metric)

In [None]:
from PatientVec.Experiments.evaluate import get_evaluator

In [None]:
evaluator = get_evaluator(data, 'Attention/Average(hs=256)+Attention(additive)(hs=128)+Structured/')

In [None]:
output = evaluator.evaluate(dev_data)

In [None]:
max_attentions = [max(x) for x in output['attentions']]

In [None]:
plt.plot(sorted(max_attentions))

Saving Models
==============

In [None]:
from common import generate_latex_tables
keys_to_use = ['roc_auc', 'pr_auc']
generate_latex_tables(data, keys_to_use)

In [None]:
dirname = 'outputs/Readmission/Basic/'
exps = os.listdir(dirname)
for e in sorted(exps) :
    if 'Structured' in e :
        print(e)
        print_results_from_model(get_latest_model(os.path.join(dirname, e)))

In [None]:
dirname = 'outputs/Diagnosis/Basic/'
exps = os.listdir(dirname)
for e in sorted(exps) :
    if 'Structured' in e :
        print(e)
        print_results_from_model(get_latest_model(os.path.join(dirname, e)))