In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
from common import *

In [None]:
import argparse
parser = argparse.ArgumentParser(description='Run Diagnosis experiments')
parser.add_argument("--data_dir", type=str, required=True)
parser.add_argument('--display', dest='display', action='store_true')
parser.add_argument("--output_dir", type=str)
parser.add_argument("--mock", dest='mock', action='store_true')

args = parser.parse_args(['--data_dir=.', '--output_dir=outputs/', '--display'])

In [None]:
from PatientVec.models.Vanilla import ClassificationTrainer as BasicCT
from PatientVec.models.Hierarchical import ClassificationTrainer as HierCT
from PatientVec.trainer import Trainer, Evaluator
from PatientVec.Experiments.modifiable_config_exp import vanilla_configs, attention_configs, hierarchical_configs, structured_configs

from PatientVec.Experiments.hyperparam_exps import get_basic_data
from PatientVec.models.baselines.LR import LR, LDA

In [None]:
from dataloaders import hip_dataset, knee_dataset

In [None]:
for yr in [1, 2, 3] :
    data = hip_dataset(args, yr=yr)
#     train_data, dev_data = get_basic_data(data, structured=False, truncate=100)

#     lr = LR({'vocab' : data.vocab, 'stop_words' : True, 'exp_name' : data.name, 'type' : 'classifier', 'norm' : 'l2'})
#     lr.train(train_data)
#     lr.evaluate(dev_data, save_results=True)
#     try :
#         lr.print_all_features(n=30)
#     except :
#         pass
        
#     print('=' * 200)

    train_data, dev_data = get_basic_data(data, structured=False, truncate=98)

    for e in attention_configs :
        config = e(data, structured=False, args=args)
        if args.output_dir is not None :
            config['exp_config']['basepath'] = args.output_dir
        config['training_config']['common']['bsize'] = 8
        config['training_config']['common']['class_weight'] = False
        config['training_config']['common']['balanced'] = True
        config['exp_config']['exp_name'] += '+Balanced'
        config['training_config']['type'] = 'RMSprop'
        print(config)

        trainer = Trainer(BasicCT, config, _type=data.metrics_type, display_metrics=True)
        trainer.train(train_data, dev_data, n_iters=15, save_on_metric=data.save_on_metric)

        evaluator = Evaluator(BasicCT, trainer.model.dirname, _type=data.metrics_type, display_metrics=True)
        _ = evaluator.evaluate(dev_data, save_results=True)
        print('-'*300)

    for e in vanilla_configs :
        config = e(data, structured=False, args=args)
        if args.output_dir is not None :
            config['exp_config']['basepath'] = args.output_dir
        config['training_config']['common']['bsize'] = 8
        config['training_config']['common']['class_weight'] = False
        config['training_config']['common']['balanced'] = True
        config['exp_config']['exp_name'] += '+Balanced'
        print(config)

        trainer = Trainer(BasicCT, config, _type=data.metrics_type, display_metrics=True)
        trainer.train(train_data, dev_data, n_iters=15, save_on_metric=data.save_on_metric)

        evaluator = Evaluator(BasicCT, trainer.model.dirname, _type=data.metrics_type, display_metrics=True)
        _ = evaluator.evaluate(dev_data, save_results=True)
        print('-'*300)
        
    print('='*500)

In [None]:
for yr in [1, 2, 3] :
    data = hip_dataset(args, yr=yr)
#     train_data, dev_data = get_basic_data(data, structured=False, truncate=100)

#     lr = LR({'vocab' : data.vocab, 'stop_words' : True, 'exp_name' : data.name, 'type' : 'classifier', 'norm' : 'l2'})
#     lr.train(train_data)
#     lr.evaluate(dev_data, save_results=True)
#     try :
#         lr.print_all_features(n=30)
#     except :
#         pass
        
#     print('=' * 200)

    train_data, dev_data = get_basic_data(data, structured=False, truncate=98)

    for e in vanilla_configs :
        config = e(data, structured=False, args=args)
        if args.output_dir is not None :
            config['exp_config']['basepath'] = args.output_dir
        config['training_config']['common']['bsize'] = 8
        config['model']['predictor']['replicate'] = True
        config['model']['predictor']['alpha'] = 0.3
        config['exp_config']['exp_name'] += '+Replicate'
        config['training_config']['type'] = 'RMSprop'
        print(config)

        trainer = Trainer(BasicCT, config, _type=data.metrics_type, display_metrics=True)
        trainer.train(train_data, dev_data, n_iters=15, save_on_metric=data.save_on_metric)

        evaluator = Evaluator(BasicCT, trainer.model.dirname, _type=data.metrics_type, display_metrics=True)
        _ = evaluator.evaluate(dev_data, save_results=True)
        print('-'*300)
        
    print('='*500)

In [None]:
for yr in [1, 2, 3] :
    data = hip_dataset(args, yr=yr)

Saving Models
==============

In [None]:
from common import generate_latex_tables
keys_to_use = ['accuracy', 'roc_auc', 'pr_auc']
for yr in [1, 2, 3] :
    data = knee_dataset(args, yr=yr)
    generate_latex_tables(data, keys_to_use)

In [None]:
for model in ['Attention', 'baselines', 'Basic'] :
    yr_df = {}
    for yr in [1, 2, 3] :
        df = pd.read_csv('Text-encoding-EHR/results/HipSurgery_' + str(yr) + '/' + model + '.csv')
        df.index = df['Method']
        df = df.drop(columns=['Method'])
        yr_df[yr] = df
    yr_df = pd.concat(yr_df.values(), axis=1, keys=yr_df.keys())
    yr_df.columns = yr_df.columns.swaplevel(0, 1)
    yr_df.sort_index(axis=1, level=0, inplace=True)
    display(HTML(yr_df.to_html()))

In [None]:
for model in ['Attention', 'baselines', 'Basic'] :
    yr_df = {}
    for yr in [1, 2, 3] :
        df = pd.read_csv('Text-encoding-EHR/results/KneeSurgery_' + str(yr) + '/' + model + '.csv')
        df.index = df['Method']
        df = df.drop(columns=['Method'])
        yr_df[yr] = df
    yr_df = pd.concat(yr_df.values(), axis=1, keys=yr_df.keys())
    yr_df.columns = yr_df.columns.swaplevel(0, 1)
    yr_df.sort_index(axis=1, level=0, inplace=True)
    display(HTML(yr_df.to_html()))