In [1]:
import pandas as pd
import numpy as np

from hydra import initialize, compose
from omegaconf import OmegaConf

import mlflow

import os
os.chdir('../../../')

from src.models import train_tf as ttf
from src.models import train_helpers as th
from src.models import test_tf as testtf

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def load_params(params_dict):
    res_dict = {}
    for key in params_dict:
        val = params_dict[key]
        if val == 'True':
            res_dict[key] = True
        elif val == 'False': 
            res_dict[key] = False
        else:
            if val.find('.') > -1:
                try:
                    res_dict[key] = float(val)
                except:
                    res_dict[key] = val
            else:
                try:
                    res_dict[key] = int(val)
                except:
                    res_dict[key] = val
    return res_dict

In [46]:
with initialize(version_base=None, config_path='../../../conf/'):
    cfg = compose(config_name='config')
cfg = cfg.test_tf

cfg.mlflow_source_experiment_name = 'Q2i_tf_bio-clinicalbert_tpe_100t_220925_121844'
cfg.mlflow_run_id = '52e6735eafa542238a1fda2ee07c3b08'
cfg.target_var = 'Q2i'

print(cfg)

{'mlflow_dir': '${train_tf.mlflow_dir}', 'mlflow_tracking_dir': '${train_tf.mlflow_tracking_dir}', 'model': '${train_tf.model}', 'mlflow_source_experiment_name': 'Q2i_tf_bio-clinicalbert_tpe_100t_220925_121844', 'mlflow_run_id': '52e6735eafa542238a1fda2ee07c3b08', 'train_path': '${split_train_test.train_path}', 'test_path': '${split_train_test.test_path}', 'text_var': '${train_tf.text_var}', 'target_var': 'Q2i', 'invert_target': '${train_tf.invert_target}', 'qual_exclude_level4': '${train_tf.qual_exclude_level4}', 'mlflow_target_experiment_name': '${train_tf.target_var}_tf_test', 'conda_yaml_path': '${train_tf.conda_yaml_path}'}


In [47]:
mlflow.set_tracking_uri(cfg.mlflow_tracking_dir)

# Load the model parameters from cross-validation
exper = mlflow.get_experiment_by_name(cfg.mlflow_source_experiment_name)
run = mlflow.get_run(cfg.mlflow_run_id)
run_params = run.data.params
train_cfg = OmegaConf.create(load_params(run_params))

Xtr, ytr = th.load_data(train_cfg, train=True)
model = ttf.train_tf_model(train_cfg, Xtr, ytr)

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model

In [48]:
hftfmodel = model.model

In [49]:
tok = model.tokenizer

In [50]:
from transformers_interpret import SequenceClassificationExplainer
cls_explainer = SequenceClassificationExplainer(
    hftfmodel,
    tok)

In [51]:
df = pd.read_pickle('data/processed/train.pkl')

In [64]:
s = df.comment.iloc[890]
s

'Co-morbid 67F, presenting with syncope. _x000D_\nBased on the initially history and exam an episode of symptomatic bradycardia seemed to be the possible cause._x000D_\n_x000D_\n the trainee was somewhat busy at the time and had employed pattern fracture1 thinking, and has missed a couple of things that pattern fracture2 thinking would have identified. I prompted  the trainee to consider other urgent causes of syncope. GI bleed had not initially been considered (although would have been identified based on the BW ordered). _x000D_\n_x000D_\nWe discussed "thinking fast and thinking slow" and how we can all fall into  thinking fast at times. We also discussed time points of when to employ a thinking slow. I.e. 1. after initial history and physical exam when something doesn\'t quite make sense. 2. when reviewing initial w/u and management and determining further w/u and management 3. Prior to patient disposition.'

In [65]:
word_attributions = cls_explainer(s)

RuntimeError: CUDA out of memory. Tried to allocate 34.00 MiB (GPU 0; 7.79 GiB total capacity; 6.50 GiB already allocated; 14.06 MiB free; 7.08 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [63]:
cls_explainer.visualize()

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.99),LABEL_1,1.0,[CLS] blank [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.99),LABEL_1,1.0,[CLS] blank [SEP]
,,,,


In [45]:
cls_explainer.visualize()

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
5.0,LABEL_5 (0.91),LABEL_5,3.99,[CLS] the train ##ee generated good images of the abdomen for free fluid . I reminded their to avoid calling ' no free fluid ' until they was completely done the sweep . I prompted their to ensure the patient was lying flat when they was checking for in ##tra - abdominal fluid as this increases the sensitivity of the test . They also looked for lung slide - I prompted their to decrease their depth so they could focus in on the p ##le ##ural line when scanning to see comet tails and p ##le ##ural sliding . [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
5.0,LABEL_5 (0.91),LABEL_5,3.99,[CLS] the train ##ee generated good images of the abdomen for free fluid . I reminded their to avoid calling ' no free fluid ' until they was completely done the sweep . I prompted their to ensure the patient was lying flat when they was checking for in ##tra - abdominal fluid as this increases the sensitivity of the test . They also looked for lung slide - I prompted their to decrease their depth so they could focus in on the p ##le ##ural line when scanning to see comet tails and p ##le ##ural sliding . [SEP]
,,,,
