In [3]:
#!pip install seqeval
#!pip install transformers

In [2]:
import os
import pandas as pd
import math
import numpy as np
from tqdm import tqdm, trange
from seqeval.metrics import classification_report, accuracy_score, f1_score
import torch
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import AutoModel, AutoConfig, AutoTokenizer
from transformers import AdamW
from transformers import AutoModelForSequenceClassification, BertForSequenceClassification

In [26]:
# no of classifier: present, not-present
num_labels = 2

In [38]:
# no of classifier: present, possible, not-present
num_labels = 3

In [63]:
# no of classifier: present, possible, conditional, not-present
num_labels = 4

In [None]:
# no of classifier: present, possible, conditional, associated_with_someone_else, not-present
num_labels = 5

In [64]:
MODEL_CLASSES = {
  'bert': (AutoConfig, BertForSequenceClassification, AutoTokenizer),
}
MODEL_ADDRESS = 'emilyalsentzer/Bio_ClinicalBERT'
config_class, model_class, tokenizer_class = MODEL_CLASSES['bert']
model_config = config_class.from_pretrained(MODEL_ADDRESS, num_labels=num_labels)
tokenizer = tokenizer_class.from_pretrained(MODEL_ADDRESS, do_lower_case=False)
model = model_class.from_pretrained(MODEL_ADDRESS, config=model_config)

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model

In [28]:
output_dir = './trained_models/2_label_model/'
model = model_class.from_pretrained(output_dir)
tokenizer = tokenizer_class.from_pretrained(output_dir)

# Copy the model to the GPU.
# model.to(device)

In [40]:
output_dir = './trained_models/3_label_model/'
model = model_class.from_pretrained(output_dir)
tokenizer = tokenizer_class.from_pretrained(output_dir)

In [65]:
output_dir = './trained_models/4_label_model/'
model = model_class.from_pretrained(output_dir)
tokenizer = tokenizer_class.from_pretrained(output_dir)

**Predict with model**

In [6]:
sentences = ['Patient has [entity] fever [entity].', 'Patient denies [entity] fever [entity].']

In [72]:
sentences = ['Patient has fever.', 'Patient denies fever.']

In [68]:
sentences = ['There was an initial murmur on admission likely secondary to [entity] severe anemia [entity] which has since resolved .']

In [66]:
sentences = ['He denies a history of [entity] aspiration events [entity] or choking with food intake .']

In [6]:
sentences = ['She had [entity] a functioning arteriovenous fistula [entity] with a thrill and a bruit in her left arm .', #present
    'This could be due to internal hernia or could be [entity] stricture [entity] related .', #possible
    'Her heart is regular without [entity] murmurs [entity] , rubs or gallops , slightly tachycardic .	', #absent
    'Call your doctor immediately if any new symptoms develop including fevers , [entity] rash [entity],'+
    'increase in bloody urine in nephrostomy or urostomy bags , etc .', #hypothetical
    'He has has [entity] stable , mild angina [entity] , subsequently , mostly in the setting of stress .', #conditional
    'She has a son who is [entity] mentally handicapped [entity] .' #associated_with_someone_else
    ]

print(type(sentences))

<class 'list'>


Read input from file 

In [6]:
xls = pd.ExcelFile('../Data/Test_ast_model_3_label.xlsx', engine='openpyxl')
df_test_present = pd.read_excel(xls, 'present')
df_test_absent = pd.read_excel(xls, 'absent')
df_test_possible = pd.read_excel(xls, 'possible')
df_test_conditional = pd.read_excel(xls, 'conditional')
df_test_hyphothetical = pd.read_excel(xls, 'hyphothetical')
df_test_associated = pd.read_excel(xls, 'associated with someone else')

present_list = df_test_present['Sentence'].tolist()
absent_list = df_test_absent['Sentence'].tolist()
possible_list = df_test_possible['Sentence'].tolist()
conditional_list = df_test_conditional['Sentence'].tolist()
hyphothetical_list = df_test_hyphothetical['Sentence'].tolist()
associated_list = df_test_associated['Sentence'].tolist()

sentences = possible_list


In [9]:
xls = pd.ExcelFile('../Data/Test_ast_model_4_label.xlsx', engine='openpyxl')
df_test_present = pd.read_excel(xls, 'present')
df_test_absent = pd.read_excel(xls, 'absent')
df_test_possible = pd.read_excel(xls, 'possible')
df_test_conditional = pd.read_excel(xls, 'conditional')
df_test_hyphothetical = pd.read_excel(xls, 'hyphothetical')
df_test_associated = pd.read_excel(xls, 'associated with someone else')

present_list = df_test_present['Sentence'].tolist()
absent_list = df_test_absent['Sentence'].tolist()
possible_list = df_test_possible['Sentence'].tolist()
conditional_list = df_test_conditional['Sentence'].tolist()
hyphothetical_list = df_test_hyphothetical['Sentence'].tolist()
associated_list = df_test_associated['Sentence'].tolist()

sentences = associated_list

In [73]:
input_ids = []
attention_masks = []

# For every sentence...
for sent in sentences:
    encoded_dict = tokenizer.encode_plus(
                        sent,                      # Sentence to encode.
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = 128,           # Pad & truncate all sentences.
                        pad_to_max_length = True,
                        return_attention_mask = True,   # Construct attn. masks.
                        return_tensors = 'pt',     # Return pytorch tensors.
                   )
      
    input_ids.append(encoded_dict['input_ids'])

    attention_masks.append(encoded_dict['attention_mask'])

# Convert the lists into tensors.
input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
# labels = torch.tensor(labels)


In [74]:
input_ids = torch.tensor(input_ids)
attention_masks = torch.tensor(attention_masks)

  input_ids = torch.tensor(input_ids)
  attention_masks = torch.tensor(attention_masks)


In [75]:
predictions = []

In [76]:
model.eval()

with torch.no_grad():
    result = model(input_ids, token_type_ids=None, attention_mask=attention_masks, return_dict=True)

logits = result.logits
logits = logits.detach().cpu().numpy()
predictions.append(logits)

# print('sentences: ', sentences)
pred_labels_i = np.argmax(logits, axis=1).flatten()
# print('Label prediction: ', pred_labels_i) 


In [None]:
for index, sentence in enumerate(sentences):
  print(sentence)
  print(pred_labels_i[index])
  if pred_labels_i[index] == 0:
    print ('Present')
  elif pred_labels_i[index] == 1:
    print ('Not-present')


In [None]:
file = open('Test_output.txt', 'w')
for index, sentence in enumerate(sentences):
  print(sentence)
  file.write(sentence)
  file.write('\n')
  if pred_labels_i[index] == 0:
    print ('Present')
    file.write('Present')
  elif pred_labels_i[index] == 1:
    print ('Possible')
    file.write('Possible')
  elif pred_labels_i[index] == 2:
    print ('Not-present')
    file.write('Not-present')
  file.write('\n')
  
file.close()

In [77]:

for index, sentence in enumerate(sentences):
  print(sentence)
  if pred_labels_i[index] == 0:
    print ('Present')
  elif pred_labels_i[index] == 1:
    print ('Possible')
  elif pred_labels_i[index] == 2:
    print ('Conditional')
  elif pred_labels_i[index] == 3:
    print ('Not-present')
  


Patient has fever.
Present
Patient denies fever.
Not-present
