In [132]:
from adapters import AutoAdapterModel
from transformers import AutoTokenizer, AutoConfig
from datasets import load_dataset
from torch.utils.data import Dataset
import torch
import torch.nn.functional as F
from tqdm.notebook import tqdm
from torch import nn

In [133]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import evaluate
import numpy as np
def compute_metrics(eval_pred, task_name):
    # print(eval_pred)
    logits, labels = eval_pred
    if task_name == 'ast':
      predictions = np.argmax(logits, axis=-1)
      accuracy = accuracy_score(labels, predictions)
      precision = precision_score(labels, predictions, average='weighted')
      recall = recall_score(labels, predictions, average='weighted')
      f1 = f1_score(labels, predictions, average='weighted')

      return {
          'accuracy': accuracy,
          'precision': precision,
          'recall': recall,
          'f1': f1,
      }
    elif task_name == 'ner':
      seqeval = evaluate.load("seqeval")
      predictions = np.argmax(logits, axis=2)
      label_list  = ["O", "B-test", "I-test", "B-problem", "I-problem", "B-treatment", "I-treatment"]
      true_predictions = [
          [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
          for prediction, label in zip(predictions, labels)
      ]
      true_labels = [
          [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
          for prediction, label in zip(predictions, labels)
      ]

      results = seqeval.compute(predictions=true_predictions, references=true_labels)
      return {
          "accuracy": results["overall_accuracy"],
          "precision": results["overall_precision"],
          "recall": results["overall_recall"],
          "f1": results["overall_f1"]
      }

### Load BERT and add adapters

In [136]:
import adapters.composition as ac
from transformers import AutoConfig
from adapters import AutoAdapterModel

pretrained_model_name_or_path = "bert-base-uncased"
#pretrained_model_name_or_path = "emilyalsentzer/Bio_Discharge_Summary_BERT"
tokenizer  = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, model_max_length=150)
special_tokens_dict = {"additional_special_tokens": ["[entity]"]}
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict,False)
model = AutoAdapterModel.from_pretrained(pretrained_model_name_or_path = pretrained_model_name_or_path,
                                         num_labels=3 , id2label = {0: 'PRESENT', 1: 'ABSENT', 2:'POSSIBLE'})
model.resize_token_embeddings(len(tokenizer))
ast = model.load_adapter("./adapter_ast_SeqBnConfig_bert/",with_head=True)
#ast = model.load_adapter("./adapter_ast_pfeiffer_clinicalbert",with_head=True)
model.active_adapters = ast
num_params = sum(p.numel() for p in model.base_model.parameters() if p.requires_grad )
print()
print(f"Number of trainable parameters: {num_params}")
print()
# How you can acces the labels and the mapping for a pretrained head
print(model.get_labels())
print(model.get_labels_dict())

Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Number of trainable parameters: 133818432

['ABSENT', 'POSSIBLE', 'PRESENT']
{1: 'ABSENT', 2: 'POSSIBLE', 0: 'PRESENT'}


In [137]:
from transformers import TrainingArguments, EvalPrediction
from utils import AssertionDatai2b2, ConceptDatai2b2
from datasets import Dataset, DatasetDict
from adapters import AdapterTrainer
from transformers import TrainingArguments, Trainer

preprocessed_data_path = "Data/preprocessed-data"
train_data_path = "Data/concept_assertion_relation_training_data"
reference_test_data_path = "Data/reference_standard_for_test_data"
test_data_path = "Data/test_data"
ast_i2b2 = AssertionDatai2b2(preprocessed_data_path=preprocessed_data_path,
                                 train_data_path=train_data_path,
                                 reference_test_data_path=reference_test_data_path,
                                 test_data_path=test_data_path)
beth_and_partners_data, test_data, all_data = ast_i2b2.load_assertion_i2b2_data()
# https://gist.github.com/vincenttzc/ceaa4aca25e53cb8da195f07e7d0af92


def tokenize_function_ast(example):
    return tokenizer(example["new_line"],   padding="max_length", truncation=True)
lbl2id ={'absent': 1 ,'possible': 2, 'present':0}
beth_and_partners_data['label_ids']= beth_and_partners_data.apply(lambda x: lbl2id[x['label']],axis=1)
ds_train = Dataset.from_pandas(beth_and_partners_data[['label_ids','new_line']])

tokenized_ds_train = ds_train.map(tokenize_function_ast)
tokenized_ds_train.set_format("torch")

trainer = AdapterTrainer(model,compute_metrics=lambda p: compute_metrics(p, task_name='ast'))
outputs = trainer.predict(tokenized_ds_train)
outputs.metrics


Loading data...

number of beth training records: 73
number of partners training records: 97
number of all test records: 256
total number of all combined records: 426

no labels for record-58
no labels for 262182942
no labels for 0305

number of beth records with labels: 72
number of partners records with labels: 96
number of test records with labels: 255
total number of all combined records with labels: 423

number of beth_and_partners examples: 6529
number of test examples: 11868
total number of combined examples: 18397



Map:   0%|          | 0/6529 [00:00<?, ? examples/s]

  0%|          | 0/817 [00:00<?, ?it/s]

{'test_loss': 0.07683995366096497,
 'test_accuracy': 0.9851432072292847,
 'test_precision': 0.984978346928311,
 'test_recall': 0.9851432072292847,
 'test_f1': 0.9849269848903314,
 'test_runtime': 100.0449,
 'test_samples_per_second': 65.261,
 'test_steps_per_second': 8.166}

In [141]:
test_data['label_ids']= test_data.apply(lambda x: lbl2id[x['label']],axis=1)
ds_test = Dataset.from_pandas(test_data[['label_ids','new_line']])

tokenized_ds_test = ds_test.map(tokenize_function_ast)
tokenized_ds_test.set_format("torch")

trainer = AdapterTrainer(model,compute_metrics=lambda p: compute_metrics(p, task_name='ast'))
outputs = trainer.predict(tokenized_ds_test)
outputs.metrics

Map:   0%|          | 0/11868 [00:00<?, ? examples/s]

  0%|          | 0/1484 [00:00<?, ?it/s]

{'test_loss': 0.29043903946876526,
 'test_accuracy': 0.9463262554769127,
 'test_precision': 0.944097160699813,
 'test_recall': 0.9463262554769127,
 'test_f1': 0.9438840273670824,
 'test_runtime': 201.9122,
 'test_samples_per_second': 58.778,
 'test_steps_per_second': 7.35}

## Test single example prediction

In [142]:
id2label = {0: 'PRESENT', 1: 'ABSENT', 2:'POSSIBLE'}
sentence = [ "Patient denies [entity] SOB [entity]",
            "Patient do not have [entity] fever [entity]",
            "had [entity] abnormal ett [entity] and referred for cath",
            "The patient recovered during the night and now denies any [entity] shortness of breath [entity].",
         "Patient with [entity] severe fever [entity].",
         "Patient should abstain from [entity] painkillers [entity]"]
model.to('cpu')
for s in sentence :
  tokenized_input = tokenizer(s, return_tensors="pt", padding=True)
  outputs = model(**tokenized_input)
  predicted_labels = torch.argmax(outputs.logits, dim=1)
  print(id2label[predicted_labels.item()])

ABSENT
ABSENT
PRESENT
ABSENT
PRESENT
ABSENT
