In [1]:
import os
import random
import copy
import sys
from seqeval.metrics import classification_report, accuracy_score, f1_score
import stanza
#import json
from transformers import BertForTokenClassification, BertTokenizer
from transformers import AutoConfig, AutoTokenizer, BertForSequenceClassification
import torch
from scipy.special import softmax
import numpy as np

In [2]:
try:
    nlp = stanza.Pipeline(lang="en", processors="tokenize")
except Exception:
    stanza.download("en")
    nlp = stanza.Pipeline(lang="en", processors="tokenize")


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.0.json:   0%|   …

2022-08-06 01:11:20 INFO: Loading these models for language: en (English):
| Processor | Package  |
------------------------
| tokenize  | combined |

2022-08-06 01:11:20 INFO: Use device: cpu
2022-08-06 01:11:20 INFO: Loading: tokenize
2022-08-06 01:11:21 INFO: Done loading processors!


In [3]:
tag2idx={'B-problem': 0,
 'B-test': 1,
 'B-treatment': 2,
 'I-problem': 3,
 'I-test': 4,
 'I-treatment': 5,
 'O': 6,
 'X': 7,
 '[CLS]': 8,
 '[SEP]': 9
 }
# Mapping index to name
tag2name = {tag2idx[key]: key for key in tag2idx}

In [4]:
def load_ner_model():
    num_labels = len(tag2idx)
    save_model_address = './trained_models/NER/C-Bert-test'
    model = BertForTokenClassification.from_pretrained(
        save_model_address, num_labels=num_labels)
    tokenizer = BertTokenizer.from_pretrained(
        save_model_address, do_lower_case=False)
    return model, tokenizer


def load_assertion_model():
    # no of classifier: present, not-present
    num_labels = 6
    MODEL_CLASSES = {
        'bert': (AutoConfig, BertForSequenceClassification, AutoTokenizer),
    }
    MODEL_ADDRESS = 'emilyalsentzer/Bio_ClinicalBERT'
    config_class, model_class, tokenizer_class = MODEL_CLASSES['bert']
    model_config = config_class.from_pretrained(
        MODEL_ADDRESS, num_labels=num_labels)
    tokenizer = tokenizer_class.from_pretrained(
        MODEL_ADDRESS, do_lower_case=False)
    model = model_class.from_pretrained(MODEL_ADDRESS, config=model_config)
    output_dir = './trained_models/Assertion/6_label_model_oversampling'
    model = model_class.from_pretrained(output_dir)
    tokenizer = tokenizer_class.from_pretrained(output_dir)
    return model, tokenizer

In [5]:
model_ner, tokenizer_ner = load_ner_model()
model_assertion, tokenizer_assertion = load_assertion_model()

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model

In [6]:
MAX_LEN = 128

In [7]:
def create_query(sentence, tokenizer):

    temp_token = ['[CLS]']
    # word_list = [token.text for token in sentence.tokens]
    for word in sentence:
        temp_token.extend(tokenizer.tokenize(word))
    temp_token = temp_token[:128 - 1]
    temp_token.append('[SEP]')
    input_id = tokenizer.convert_tokens_to_ids(temp_token)
    padding_len = MAX_LEN - len(input_id)
    input_id = input_id + ([0] * padding_len)
    tokenized_texts = [input_id]
    attention_masks = [[int(i>0) for i in input_id]]

    return temp_token, torch.tensor(tokenized_texts), torch.tensor(attention_masks)

In [8]:
def model_inference(model, input_ids):
    #model.to(device)
    #input_ids = input_ids.to(device)
    model.eval()
    with torch.no_grad():
        outputs = model(input_ids, token_type_ids=None,
                        attention_mask=None)
        # For eval mode, the first result of outputs is logits
        logits = outputs[0]
    # Get NER predict result
    predict_results = logits.detach().cpu().numpy()
    result_arrays_soft = softmax(predict_results[0])

    return np.argmax(result_arrays_soft, axis=-1)

In [9]:
def predict_entities(long_text):
    all_sentences = []
    all_tags = []
    doc = nlp(long_text)
    for i, sentence in enumerate(doc.sentences):
        # temp_token: tokenized words
        # input_ids: convert temp_token to id
        word_list = [token.text for token in sentence.tokens]
        temp_token, input_ids, attention_masks = create_query(word_list, tokenizer_ner)
        result_list = model_inference(model_ner, input_ids)
        result = [tag2name[t] for t in result_list]
        pretok_sent = ""
        pretags = ""
        for i, tok in enumerate(temp_token):
            if tok.startswith("##"):
                pretok_sent += tok[2:]
            else:
                pretok_sent += f" {tok}"
                pretags += f" {result[i]}"
        pretok_sent = pretok_sent[1:]
        pretags = pretags[1:]
        s = pretok_sent.split()
        t = pretags.split()
        all_sentences.append(s)
        all_tags.append(t)
    return all_sentences, all_tags

In [10]:
#NER_input_text = "He had no cardiac murmur ."

In [11]:
#all_sentences, all_tags = predict_entities (NER_input_text)

#print (all_sentences)
#print (all_tags)

In [12]:
# extract index of entity
# extract sentence with assertion
def entity_extractor(all_sentences, all_tags):
    sentences_with_problem = []
    all_problems_in_text_tmp = []
    all_treatment_in_text = []
    all_test_in_text = []

    for s, t in zip(all_sentences, all_tags):
        flag_treatment, flag_problem, flag_test = 0, 0, 0
        problem_in_sentence = ''
        treatment_in_sentence = []
        test_in_sentence = []
        for i in range(1, len(t)-1):
            if t[i] == 'B-problem':
                flag_problem = 1
                # if there is entities, add the index of sentence to a list
                # sentences_with_problem.append(n)
                # append the index of entity to a list
                if problem_in_sentence:
                    problem_in_sentence = f'{problem_in_sentence}| {str(i)}'
                else:
                    problem_in_sentence += str(i)
            elif t[i] == 'I-problem' or t[i] == 'X' and flag_problem == 1:
                problem_in_sentence = f'{problem_in_sentence} {str(i)}'
            elif t[i] == 'B-test':
                flag_test = 1
                test_in_sentence.append(i)
            elif t[i] == 'I-test' or t[i] == 'X' and flag_test == 1:
                test_in_sentence.append(i)
            elif t[i] == 'B-treatment':
                flag_treatment = 1
                treatment_in_sentence.append(i)
            elif t[i] == 'I-treatment' or t[i] == 'X' and flag_treatment == 1:
                treatment_in_sentence.append(i)
            elif t[i] in ['O', 'X']:
                flag_treatment, flag_problem, flag_test = 0, 0, 0
                # print(s[i], end=' ')
        all_problems_in_text_tmp.append(problem_in_sentence)
        all_treatment_in_text.append(treatment_in_sentence)
        all_test_in_text.append(test_in_sentence)

    # create sentences with '[entity]' tag
    all_problems_in_text = []
    sentences_with_problem = []
    for sentence, problem_index in zip(all_sentences, all_problems_in_text_tmp):
        # print(problem_index)
        if problem_index:
            index = problem_index.split('|')
            tmp = [i.split() for i in index]
            all_problems_in_text.append(tmp)
            for i_list in tmp:
                s = copy.deepcopy(sentence)
                s.insert(int(i_list[-1])+1, '[entity]')
                s.insert(int(i_list[0]), '[entity]')
                s = ' '.join(s)
                sentences_with_problem.append(s)
        else:
            # sentences_with_problem.append(sentence)
            all_problems_in_text.append(problem_index)

    return sentences_with_problem, all_problems_in_text, all_treatment_in_text, all_test_in_text


In [13]:
def assertion_input_creator(sentences, tokenizer, add_special_tokens=True):
    input_ids = []
    attention_mask = []
    # For every sentence...
    for sent in sentences:
        encoded_dict = tokenizer.encode_plus(
            sent,                      # Sentence to encode.
            add_special_tokens=add_special_tokens,  # Add '[CLS]' and '[SEP]'
            max_length=128,           # Pad & truncate all sentences.
            pad_to_max_length=True,
            return_attention_mask=True,   # Construct attn. masks.
            return_tensors='pt',     # Return pytorch tensors.
        )
        input_ids.append(encoded_dict['input_ids'])
        attention_mask.append(encoded_dict['attention_mask'])

    # Convert the lists into tensors.
    input_ids = torch.cat(input_ids, dim=0)
    attention_mask = torch.cat(attention_mask, dim=0)
    return torch.tensor(input_ids), torch.tensor(attention_mask)


def assertion_model_inference(model, input_ids, attention_mask):
    #model.to(device)
    #input_ids = input_ids.to(device)
    #attention_mask = attention_mask.to(device)

    predictions, true_labels = [], []
    model.eval()
    with torch.no_grad():
        result = model(input_ids, token_type_ids=None,
                       attention_mask=attention_mask, return_dict=True)

    logits = result.logits
    logits = logits.detach().cpu().numpy()
    predictions.append(logits)

    pred_labels_i = np.argmax(logits, axis=1).flatten()

    def index2label(x):
        if x == 0:
            return 'Present'
        elif x == 1:
            return 'Possible'
        elif x == 2:
            return 'Conditional'
        elif x == 3:
            return 'Associated with someone else'
        elif x == 4:
            return 'Hypothetical'
        elif x == 5:
            return 'Absent'

    pred_labels = map(index2label, pred_labels_i)
    return list(pred_labels)

In [14]:
def predict_assertion(all_sentences, all_tags):

    # extract index of entity
    # extract sentence with assertion

    (
        sentences_with_problem,
        all_problems_in_text,
        all_treatment_in_text,
        all_test_in_text,
    ) = entity_extractor(all_sentences, all_tags)
    input_ids, attention_mask = assertion_input_creator(
        sentences_with_problem, tokenizer_assertion, add_special_tokens=False
    )
    pred_labels = assertion_model_inference(model_assertion, input_ids, attention_mask)
    # map labels wordwisely.
    """
    e.g. problem_list = [[[4, 5, 6], [10]], '', [[2, 3], [6,7]]]
        label = ['yes', 'no', 'what', 'yes']
    resutls: [['yes', 'yes', 'yes', 'no'], '', ['what', 'what', 'yes', 'yes']]
    """
    i_label = 0
    labels_in_sentence = []
    for index in all_problems_in_text:
        if index:
            tmp = []
            for i_p in index:
                tmp.extend([pred_labels[i_label]] * len(i_p))
                i_label += 1
            labels_in_sentence.append(tmp)
        else:
            labels_in_sentence.append(index)

    all_problems_in_text_flatten = list(
        map(lambda l: [int(item) for elem in l for item in elem], all_problems_in_text)
    )
    return (
        labels_in_sentence,
        all_problems_in_text_flatten,
        all_treatment_in_text,
        all_test_in_text,
    )

In [15]:
# This method add start-tag if there is no Begin (B) before Inside (I) and Unseen (X). 
# Ast : - Present   Present  - Present   Absent    Absent    -
# Tag : O I-problem I-roblem O I-problem I-problem I-problem O
# output : O <present> B-problem </present> <present> B-problem X I-problem </present> <absent> B-problem I-problem </absent> O 

def add_problem_start_tag(
    output_text_with_classification,
    word,
    i,
    tags,
    assertion_index,
    pred_assertion,
    list_ast_entity,
    assertion,
    tag_string,
):
    if i-1 not in assertion_index:
        # Ast   : -    P   -     P    -
        # Tag   : O <> I I O <> I I I O
        list_ast_entity.append(word)
        output_text_with_classification = (
            output_text_with_classification + tag_string + word
        )
    else: 
        prev_index = assertion_index.index(i)
        if pred_assertion[prev_index] != assertion:
            # Ast   : -     - P     A   -
            # Tag   : O I I O I <>  I I O
            list_ast_entity.append(word)
            output_text_with_classification = (
                output_text_with_classification + tag_string + word
            )
        else :
            list_ast_entity[-1] = (
                list_ast_entity[-1] + " " + word
            )
            output_text_with_classification = (
                output_text_with_classification + " " + word
            )

    return output_text_with_classification, list_ast_entity


# This method add end-tag for all cases. 
# Ast : - Present   Present  Present Present   Absent    Absent    -
# Tag : O B-problem B-roblem X       I-problem B-problem I-problem O
# output : O <present> B-problem </present> <present> B-problem X I-problem </present> <absent> B-problem I-problem </absent> O 

def add_problem_end_tag(
    output_text_with_classification,
    i,
    tags,
    assertion_index,
    pred_assertion,
    assertion,
    tag_string,
):
    if i + 1 in assertion_index:
        next_index = assertion_index.index(i + 1)
        if pred_assertion[next_index] == assertion:
            # Ast :  P     P
            # Tag: O B </> B X I B I O
            if tags[i+1] == "B-problem": 
                output_text_with_classification = (
                    output_text_with_classification + tag_string
                )
        else :
            # Ast :        P     A 
            # Tag: O B B X I </> B I O
            output_text_with_classification = (
                output_text_with_classification + tag_string
            )
            
    else:
        # Ast :             A
        # Tag : O B B X I B I </> O
        output_text_with_classification = output_text_with_classification + tag_string

    return output_text_with_classification

In [16]:
def process_sentence(
    sentence,
    tags,
    pred_assertion,
    assertion_index,
    treatment_index,
    test_index,
    output_text_with_classification,
    list_ast_present_entity,
    list_ast_absent_entity,
    list_ast_posssible_entity,
    list_ast_conditional_entity,
    list_ast_hyphothetical_entity,
    list_ast_associated_entity,
    list_treatment_entity,
    list_test_entity,
):
    #print('sentence ::: ', sentence)
    #print('tags ::: ', tags)
    #print('pred_assertion ::: ', pred_assertion)
    #print('assertion_index ::: ', assertion_index)
    #print('treatment_index ::: ', treatment_index)
    #print('test_index ::: ', test_index)

    for i, word in enumerate(sentence):

        if i in assertion_index:
            index = assertion_index.index(i)
            if tags[i] == "B-problem":
                if pred_assertion[index] == "Present":
                    list_ast_present_entity.append(word)
                    output_text_with_classification = (
                        output_text_with_classification + " <Problem-present> " + word
                    )
                    # For one word problem-entity, there is only 'beginning' tag no 'inside' tag
                    # tags : 'O', 'O', 'B-problem', 'O', 'O',
                    output_text_with_classification = add_problem_end_tag(
                        output_text_with_classification,
                        i,
                        tags,
                        assertion_index,
                        pred_assertion,
                        "Present",
                        " </Problem-present> ",
                    )

                elif pred_assertion[index] == "Possible":
                    list_ast_posssible_entity.append(word)
                    output_text_with_classification = (
                        output_text_with_classification + " <Problem-possible> " + word
                    )
                    output_text_with_classification = add_problem_end_tag(
                        output_text_with_classification,
                        i,
                        tags,
                        assertion_index,
                        pred_assertion,
                        "Possible",
                        " </Problem-possible> ",
                    )

                elif pred_assertion[index] == "Conditional":
                    list_ast_conditional_entity.append(word)
                    output_text_with_classification = (
                        output_text_with_classification
                        + " <Problem-conditional> "
                        + word
                    )
                    output_text_with_classification = add_problem_end_tag(
                        output_text_with_classification,
                        i,
                        tags,
                        assertion_index,
                        pred_assertion,
                        "Conditional",
                        " </Problem-conditional> ",
                    )

                elif pred_assertion[index] == "Hypothetical":
                    list_ast_hyphothetical_entity.append(word)
                    output_text_with_classification = (
                        output_text_with_classification
                        + " <Problem-hypothetical> "
                        + word
                    )
                    output_text_with_classification = add_problem_end_tag(
                        output_text_with_classification,
                        i,
                        tags,
                        assertion_index,
                        pred_assertion,
                        "Hypothetical",
                        " </Problem-hypothetical> ",
                    )

                elif pred_assertion[index] == "Associated with someone else":
                    list_ast_associated_entity.append(word)
                    output_text_with_classification = (
                        output_text_with_classification
                        + " <Problem-associated> "
                        + word
                    )
                    output_text_with_classification = add_problem_end_tag(
                        output_text_with_classification,
                        i,
                        tags,
                        assertion_index,
                        pred_assertion,
                        "Associated with someone else",
                        " </Problem-associated> ",
                    )

                elif pred_assertion[index] == "Absent":
                    list_ast_absent_entity.append(word)
                    output_text_with_classification = (
                        output_text_with_classification + " <Problem-absent> " + word
                    )
                    output_text_with_classification = add_problem_end_tag(
                        output_text_with_classification,
                        i,
                        tags,
                        assertion_index,
                        pred_assertion,
                        "Absent",
                        " </Problem-absent> ",
                    )

            elif tags[i] == "I-problem" or tags[i] == "X":
                if pred_assertion[index] == "Present":
                    (output_text_with_classification, list_ast_present_entity) = add_problem_start_tag(
                        output_text_with_classification,
                        word,
                        i,
                        tags,
                        assertion_index,
                        pred_assertion,
                        list_ast_present_entity,
                        "Present",
                        " <Problem-present> ",
                    )

                    output_text_with_classification = add_problem_end_tag(
                        output_text_with_classification,
                        i,
                        tags,
                        assertion_index,
                        pred_assertion,
                        "Present",
                        " </Problem-present> ",
                    )

                elif pred_assertion[index] == "Possible":

                    (output_text_with_classification, list_ast_posssible_entity) = add_problem_start_tag(
                        output_text_with_classification,
                        word,
                        i,
                        tags,
                        assertion_index,
                        pred_assertion,
                        list_ast_posssible_entity,
                        "Possible",
                        " <Problem-possible> ",
                    )
                    output_text_with_classification = add_problem_end_tag(
                        output_text_with_classification,
                        i,
                        tags,
                        assertion_index,
                        pred_assertion,
                        "Possible",
                        " </Problem-possible> ",
                    )

                elif pred_assertion[index] == "Conditional":

                    (output_text_with_classification, list_ast_conditional_entity) = add_problem_start_tag(
                        output_text_with_classification,
                        word,
                        i,
                        tags,
                        assertion_index,
                        pred_assertion,
                        list_ast_conditional_entity,
                        "Conditional",
                        " <Problem-conditional> ",
                    )
                    output_text_with_classification = add_problem_end_tag(
                        output_text_with_classification,
                        i,
                        tags,
                        assertion_index,
                        pred_assertion,
                        "Conditional",
                        " </Problem-conditional> ",
                    )

                elif pred_assertion[index] == "Hypothetical":

                    (output_text_with_classification, list_ast_hyphothetical_entity) = add_problem_start_tag(
                        output_text_with_classification,
                        word,
                        i,
                        tags,
                        assertion_index,
                        pred_assertion,
                        list_ast_hyphothetical_entity,
                        "Hypothetical",
                        " <Problem-hypothetical> ",
                    )
                    output_text_with_classification = add_problem_end_tag(
                        output_text_with_classification,
                        i,
                        tags,
                        assertion_index,
                        pred_assertion,
                        "Hypothetical",
                        " </Problem-hypothetical> ",
                    )

                elif pred_assertion[index] == "Associated with someone else":

                    (output_text_with_classification, list_ast_associated_entity) = add_problem_start_tag(
                        output_text_with_classification,
                        word,
                        i,
                        tags,
                        assertion_index,
                        pred_assertion,
                        list_ast_associated_entity,
                        "Associated with someone else",
                        " <Problem-associated> ",
                    )
                    output_text_with_classification = add_problem_end_tag(
                        output_text_with_classification,
                        i,
                        tags,
                        assertion_index,
                        pred_assertion,
                        "Associated with someone else",
                        " </Problem-associated> ",
                    )

                elif pred_assertion[index] == "Absent":

                    (output_text_with_classification, list_ast_absent_entity) = add_problem_start_tag(
                        output_text_with_classification,
                        word,
                        i,
                        tags,
                        assertion_index,
                        pred_assertion,
                        list_ast_absent_entity,
                        "Absent",
                        " <Problem-absent> ",
                    )
                    output_text_with_classification = add_problem_end_tag(
                        output_text_with_classification,
                        i,
                        tags,
                        assertion_index,
                        pred_assertion,
                        "Absent",
                        " </Problem-absent> ",
                    )

        elif i in treatment_index:
            if tags[i] == "B-treatment":
                list_treatment_entity.append(word)
                output_text_with_classification = (
                    output_text_with_classification + " <Treatment> " + word
                )
                # For one word Treatment-entity, there is only 'beginning' tag no 'inside' tag
                if tags[i + 1] != "I-treatment" and tags[i + 1] != "X":
                    output_text_with_classification = (
                        output_text_with_classification + " </Treatment> "
                    )
            elif tags[i] == "I-treatment" or tags[i] == "X":
                if i-1 not in treatment_index:
                    # Tag: O I-treatment O I-treatment X 0 
                    list_treatment_entity.append(word)
                    output_text_with_classification = (
                        output_text_with_classification + " <Treatment> " + word
                    )
                else : 
                    list_treatment_entity[-1] = list_treatment_entity[-1] + " " + word
                    output_text_with_classification = (
                        output_text_with_classification + " " + word
                    )

                if tags[i + 1] != "I-treatment" and tags[i + 1] != "X":
                    output_text_with_classification = (
                        output_text_with_classification + " </Treatment> "
                    )
        elif i in test_index:
            if tags[i] == "B-test":
                list_test_entity.append(word)
                output_text_with_classification = (
                    output_text_with_classification + " <Test> " + word
                )
                # For one word Test-entity, there is only 'beginning' tag no 'inside' tag
                if tags[i + 1] != "I-test" and tags[i + 1] != "X":
                    output_text_with_classification = (
                        output_text_with_classification + " </Test> "
                    )
            elif tags[i] == "I-test" or tags[i] == "X":
                if i-1 not in test_index:
                    # Tag: O I-test O I-test X 0 
                    list_test_entity.append(word)
                    output_text_with_classification = (
                        output_text_with_classification + " <Test> " + word
                    )
                else :
                    list_test_entity[-1] = list_test_entity[-1] + " " + word
                    output_text_with_classification = (
                        output_text_with_classification + " " + word
                    )
                if tags[i + 1] != "I-test" and tags[i + 1] != "X":
                    output_text_with_classification = (
                        output_text_with_classification + " </Test> "
                    )
        else:
            if word.strip() != "[SEP]" and word.strip() != "[CLS]":
                output_text_with_classification = (
                    output_text_with_classification + " " + word
                )

    return (
        output_text_with_classification,
        list_ast_present_entity,
        list_ast_absent_entity,
        list_ast_posssible_entity,
        list_ast_conditional_entity,
        list_ast_hyphothetical_entity,
        list_ast_associated_entity,
        list_treatment_entity,
        list_test_entity,
    )

# Insert input text.

If input is long, it may take few seconds/minutes to predict.  

In [19]:
input_text = "Brief Hospital Course : Admitted 05-08 to cardiology service for surgical work-up . INR 2.7 and echo repeated , coumadin held , and heparin drips started . Review of OSH echo revealed peak gradient of 64mm (  not 30 's ) . New echo showed pannus involving the valve and ? thrombus . Seen by Dr. Laura of cardiac surgery and underwent redo AVR with aortic root enlargement on 05-11 . Transferred to the CSRU in stable condition on phenylephrine and propofol drips . Weaned to extubation on POD #2 and off all drips . Episode of AFib that evening treated with amiodarone and converted to SR ."

In [20]:
input_text = "Significant for chronic atrial fibrillation , managed with an Italian variation of Digoxin , and Amioadrone , hypertension managed with an ACE inhibitor , chronic obstructive pulmonary disease managed with a zanthine preparation and low dose steroids , as well as an occasional inhaler ."

In [21]:
all_sentences, all_tags = predict_entities(input_text)

(assertion_in_sentence,
    all_problems_in_text_flatten,
    all_treatment_in_text,
    all_test_in_text,
) = predict_assertion(all_sentences, all_tags)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
  return torch.tensor(input_ids), torch.tensor(attention_mask)


In [23]:
# These lists are to collect problem-entity label-wise and display on the UI Table
list_ast_present_entity = []
list_ast_absent_entity = []
list_ast_posssible_entity = []
list_ast_conditional_entity = []
list_ast_hyphothetical_entity = []
list_ast_associated_entity = []

list_treatment_entity = []
list_test_entity = []

# This string is to format full clinical text and add <tag>, like  <Problem-present> problem-entity </Problem-present>
output_text_with_classification = ""

for (
    sentence,
    tags,
    pred_assertion,
    assertion_index,
    treatment_index,
    test_index,
) in zip(
    all_sentences,
    all_tags,
    assertion_in_sentence,
    all_problems_in_text_flatten,
    all_treatment_in_text,
    all_test_in_text,
):
    (
        output_text_with_classification,
        list_ast_present_entity,
        list_ast_absent_entity,
        list_ast_posssible_entity,
        list_ast_conditional_entity,
        list_ast_hyphothetical_entity,
        list_ast_associated_entity,
        list_treatment_entity,
        list_test_entity,
    ) = process_sentence(
        sentence,
        tags,
        pred_assertion,
        assertion_index,
        treatment_index,
        test_index,
        output_text_with_classification,
        list_ast_present_entity,
        list_ast_absent_entity,
        list_ast_posssible_entity,
        list_ast_conditional_entity,
        list_ast_hyphothetical_entity,
        list_ast_associated_entity,
        list_treatment_entity,
        list_test_entity,
    )


In [24]:
print("output text :: ", output_text_with_classification)

output text ::   Significant for <Problem-present> chronic atrial fibrillation </Problem-present>  , managed with an Italian variation of <Treatment> Digoxin </Treatment>  , and <Treatment> Amioadrone </Treatment>  , <Problem-present> hypertension </Problem-present>  managed with <Treatment> an ACE inhibitor </Treatment>  , <Problem-present> chronic obstructive pulmonary disease </Problem-present>  managed with <Treatment> a zanthine preparation </Treatment>  and <Treatment> low dose steroids </Treatment>  , as well as <Treatment> an </Treatment>  <Problem-present> occasional inhaler </Problem-present>  .


In [25]:
print ('Present entity list : ', list_ast_present_entity)
print ('Absent entity list : ', list_ast_absent_entity)
print ('Possible entity list : ', list_ast_posssible_entity)
print ('Conditional entity list : ', list_ast_conditional_entity)
print ('Hyphothetical entity list : ', list_ast_hyphothetical_entity)
print ('Associated entity list : ', list_ast_associated_entity)
print ('Treatment entity list : ', list_treatment_entity)
print ('Test entity list : ', list_test_entity)


Present entity list :  ['chronic atrial fibrillation', 'hypertension', 'chronic obstructive pulmonary disease', 'occasional inhaler']
Absent entity list :  []
Possible entity list :  []
Conditional entity list :  []
Hyphothetical entity list :  []
Associated entity list :  []
Treatment entity list :  ['Digoxin', 'Amioadrone', 'an ACE inhibitor', 'a zanthine preparation', 'low dose steroids', 'an']
Test entity list :  []
