In [4]:
import math
import pandas as pd
import numpy as np
import os

from scipy.special import softmax
from seqeval.metrics import classification_report, accuracy_score, f1_score

import torch
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from transformers import BertForTokenClassification, BertTokenizer

import processor
from processor import *

In [2]:
num_labels = len(tag2idx)

In [3]:
# Load model 
save_model_address = './trained_models/C-Bert-test'
model = BertForTokenClassification.from_pretrained(save_model_address, num_labels=num_labels)
tokenizer = BertTokenizer.from_pretrained(save_model_address, do_lower_case=False)

In [5]:
clinical_text = [['He',
 'was',
 'admitted',
 ',',
 'taken',
 'to',
 'the',
 'operating',
 'room',
 'where',
 'he',
 'underwent',
 'L5-S1',
 'right',
 'hemilaminectomy',
 'and',
 'discectomy',
 '.']
, 
['Over',
 'the',
 'next',
 'three',
 'days',
 'he',
 'increased',
 'his',
 'activity',
 'gradually',
 ',',
 'was',
 'able',
 'to',
 'do',
 'stairs',
 'with',
 'Physical',
 'Therapy',
 'and',
 'had',
 'pain',
 'which',
 'could',
 'be',
 'controlled',
 'with',
 'oral',
 'analgesics',
 '.'],

['However', ',', 'her', 'creatinine', 'continued', 'to', 'increase', '.']
]

In [6]:
clinical_tags = [['O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'B-treatment',
 'I-treatment',
 'I-treatment',
 'O',
 'B-treatment',
 'O'],
                 
 ['O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'B-treatment',
 'I-treatment',
 'O',
 'O',
 'B-problem',
 'O',
 'O',
 'O',
 'O',
 'O',
 'B-treatment',
 'I-treatment',
 'O'],
 
 ['O', 'O', 'B-test', 'I-test', 'O', 'O', 'O', 'O']                
]


In [51]:
query_input_ids, query_input_tags, query_attention_masks = process_data(clinical_text, clinical_tags, tokenizer)

In [52]:
query_inputs = torch.tensor(query_input_ids)
query_tags = torch.tensor(query_input_tags)
query_masks = torch.tensor(query_attention_masks)


In [53]:
query_masks.shape

torch.Size([3, 64])

In [54]:
def model_evaluation(input_ids, label_ids, input_mask):
    y_true = []
    y_pred = []
    model.eval()
    with torch.no_grad():
        outputs = model(input_ids, token_type_ids=None,
        attention_mask=input_mask)
        # For eval mode, the first result of outputs is logits
        logits = outputs[0] 
    # Get NER predict result
    logits = torch.argmax(F.log_softmax(logits, dim=2), dim=2)
    logits = logits.detach().cpu().numpy()
    # Get NER true result
    label_ids = label_ids.to('cpu').numpy()
    # Only predict the real word, mark=0, will not calculate
    input_mask = input_mask.to('cpu').numpy()
    # Compare the valuable predict result
    for i, mask in enumerate(input_mask):
        # Real one
        temp_1 = []
        # Predict one
        temp_2 = []
        for j, m in enumerate(mask):
            # Mark=0, meaning its a pad word, dont compare
            if m:
                if tag2name[label_ids[i][j]] != "X" and tag2name[label_ids[i][j]] != "[CLS]" and tag2name[label_ids[i][j]] != "[SEP]" : # Exclude the X label
                    # print(tag2name[logits[i][j]])
                    temp_1.append(tag2name[label_ids[i][j]])
                    temp_2.append(tag2name[logits[i][j]])
            else:
                break
        y_true.append(temp_1)
        y_pred.append(temp_2)

    return y_true, y_pred


In [55]:
y_true, y_pred = model_evaluation(query_inputs, query_tags, query_masks)
    

In [57]:
report = classification_report(y_true, y_pred, digits=4)


In [60]:
# Get acc , recall, F1 result report
report = classification_report(y_true, y_pred, digits=4)

# Save the report into file
output_eval_file = "eval_results.txt"

with open(output_eval_file, "w") as writer:
    print("***** Eval results *****")
    print("\n%s"%(report))
    print("f1 socre: %f"%(f1_score(y_true, y_pred)))
    print("Accuracy score: %f"%(accuracy_score(y_true, y_pred)))
    
    writer.write("f1 socre:\n")
    writer.write(str(f1_score(y_true, y_pred)))
    writer.write("\n\nAccuracy score:\n")
    writer.write(str(accuracy_score(y_true, y_pred)))
    writer.write("\n\n")  
    writer.write(report)

***** Eval results *****

              precision    recall  f1-score   support

     problem     1.0000    1.0000    1.0000         1
        test     1.0000    1.0000    1.0000         1
   treatment     1.0000    1.0000    1.0000         4

   micro avg     1.0000    1.0000    1.0000         6
   macro avg     1.0000    1.0000    1.0000         6
weighted avg     1.0000    1.0000    1.0000         6

f1 socre: 1.000000
Accuracy score: 1.000000


In [63]:
report.split('\n')

['              precision    recall  f1-score   support',
 '',
 '     problem     1.0000    1.0000    1.0000         1',
 '        test     1.0000    1.0000    1.0000         1',
 '   treatment     1.0000    1.0000    1.0000         4',
 '',
 '   micro avg     1.0000    1.0000    1.0000         6',
 '   macro avg     1.0000    1.0000    1.0000         6',
 'weighted avg     1.0000    1.0000    1.0000         6',
 '']

In [9]:
model = BertForTokenClassification.from_pretrained(bert_out_address, num_labels=len(tag2idx))
tokenizer = BertTokenizer.from_pretrained(bert_out_address, do_lower_case=False)


In [13]:
test_query = ' '.join(test_query)


In [14]:
test_query

'Over the next three days he increased his activity gradually , was able to do stairs with Physical Therapy and had pain which could be controlled with oral analgesics .'

In [15]:
tokenized_texts = []
temp_token = []
temp_token.append('[CLS]')
token_list = tokenizer.tokenize(test_query)
temp_token.extend(token_list)
temp_token = temp_token[:max_len-1]
temp_token.append('[SEP]')
input_id = tokenizer.convert_tokens_to_ids(temp_token)
padding_len = max_len - len(input_id)
input_id = input_id + ([0] * padding_len)
tokenized_texts = []
tokenized_texts.append(input_id)
attention_masks = [[int(i>0) for i in input_id]]

tokenized_texts = torch.tensor(tokenized_texts)
attention_masks = torch.tensor(attention_masks)

In [16]:
# Set save model to Evalue loop
model.eval()
# Get model predict result
with torch.no_grad():
        outputs = model(tokenized_texts, token_type_ids=None,
        attention_mask=None,)
        # For eval mode, the first result of outputs is logits
        logits = outputs[0] 

In [17]:
predict_results = logits.detach().cpu().numpy()
result_arrays_soft = softmax(predict_results[0])
result_list = np.argmax(result_arrays_soft,axis=-1)

In [18]:
result = [tag2name[t] for t in result_list]
pretok_sent = ""
pretags = ""
for i, tok in enumerate(temp_token):
     if tok.startswith("##"):
         pretok_sent += tok[2:]
     else:
         pretok_sent += " " + tok
         pretags += " " + result[i]
pretok_sent = pretok_sent[1:]
pretags = pretags[1:]

s = pretok_sent.split()
t = pretags.split()

In [19]:
for i, mark in enumerate(attention_masks[0]):
    if mark>0:
        print("Token:%s"%(temp_token[i]))
#         print("Tag:%s"%(result_list[i]))
        print("Predict_Tag:%s"%(tag2name[result_list[i]]))
        #print("Posibility:%f"%(result_array[i][result_list[i]]))
        print()
        

Token:[CLS]
Predict_Tag:[CLS]

Token:Over
Predict_Tag:O

Token:the
Predict_Tag:O

Token:next
Predict_Tag:O

Token:three
Predict_Tag:O

Token:days
Predict_Tag:O

Token:he
Predict_Tag:O

Token:increased
Predict_Tag:O

Token:his
Predict_Tag:O

Token:activity
Predict_Tag:O

Token:gradually
Predict_Tag:O

Token:,
Predict_Tag:O

Token:was
Predict_Tag:O

Token:able
Predict_Tag:O

Token:to
Predict_Tag:O

Token:do
Predict_Tag:O

Token:stairs
Predict_Tag:O

Token:with
Predict_Tag:O

Token:Physical
Predict_Tag:B-treatment

Token:Therapy
Predict_Tag:I-treatment

Token:and
Predict_Tag:O

Token:had
Predict_Tag:O

Token:pain
Predict_Tag:B-problem

Token:which
Predict_Tag:O

Token:could
Predict_Tag:O

Token:be
Predict_Tag:O

Token:controlled
Predict_Tag:O

Token:with
Predict_Tag:O

Token:oral
Predict_Tag:B-treatment

Token:anal
Predict_Tag:I-treatment

Token:##ges
Predict_Tag:X

Token:##ics
Predict_Tag:X

Token:.
Predict_Tag:O

Token:[SEP]
Predict_Tag:[SEP]



In [24]:
text = ' '.join(temp_token)
re = ' '.join(result)

In [31]:
import docx
from docx.enum.text import WD_COLOR_INDEX
# Create an instance of a word document
doc = docx.Document()

# Add a Title to the document 
doc.add_heading('Results', 0)

# Creating paragraph with some content
para = doc.add_paragraph(''' ''')
  
flag_treatment, flag_problem, flag_test = 0, 0, 0 
for i in range(1, len(t)-1):
    if t[i] == 'B-treatment':
        flag_treatment = 1
        para.add_run(s[i]+' ').font.highlight_color = WD_COLOR_INDEX.RED
        # print(print_treatment(s[i]), end=' ')
    elif (t[i] == 'I-treatment' or t[i] == 'X') and flag_treatment == 1 :
        para.add_run(s[i]+' ').font.highlight_color = WD_COLOR_INDEX.RED
    elif t[i] == 'B-test':
        flag_test = 1
        para.add_run(s[i]+' ').font.highlight_color = WD_COLOR_INDEX.PINK
    elif (t[i] == 'I-test' or t[i] == 'X') and flag_test == 1 :
        para.add_run(s[i]+' ').font.highlight_color = WD_COLOR_INDEX.PINK
    elif t[i] == 'B-problem':
        flag_problem = 1
        para.add_run(s[i]+' ').font.highlight_color = WD_COLOR_INDEX.TURQUOISE
    elif (t[i] == 'I-problem' or t[i] == 'X') and flag_problem == 1 :
        para.add_run(s[i]+' ').font.highlight_color = WD_COLOR_INDEX.TURQUOISE    
    elif t[i] == 'O':
        flag_treatment, flag_problem, flag_test = 0, 0, 0 
        para.add_run(s[i]+' ').font.highlight_color = WD_COLOR_INDEX.AUTO
        

# # Adding more content to paragraph and highlighting them
# para.add_run(''' It contains well written, well thought and well-explained '''
#             ).font.highlight_color = WD_COLOR_INDEX.YELLOW
  
# # Adding more content to paragraph
# para.add_run('''computer science and programming articles, quizzes etc.''')
  
# Now save the document to a location 
doc.save('result.docx')


In [134]:
from termcolor import colored
def print_treatment(word):
    return colored(word,'white','on_red', attrs=['underline'])
def print_test(word):
    return colored(word, 'white','on_magenta')
def print_problem(word):
    return colored(word, 'magenta','on_cyan', attrs=['reverse', 'bold'])


In [135]:
print(print_problem('problem'))
print(print_treatment('treatment'))
print(print_test('test'))

[1m[7m[46m[35mproblem[0m
[4m[41m[37mtreatment[0m
[45m[37mtest[0m


In [137]:
from termcolor import colored
location = {}
flag_treatment, flag_problem, flag_test = 0, 0, 0 
for i in range(1, len(t)-1):
    if t[i] == 'B-treatment':
        flag_treatment = 1
        print(print_treatment(s[i]), end=' ')
    elif (t[i] == 'I-treatment' or t[i] == 'X') and flag_treatment == 1 :
        print(print_treatment(s[i]), end=' ')
    elif t[i] == 'B-test':
        flag_test = 1
        print(print_test(s[i]), end=' ')
    elif (t[i] == 'I-test' or t[i] == 'X') and flag_test == 1 :
        print(print_test(s[i]), end=' ')
    elif t[i] == 'B-problem':
        flag_problem = 1
        print(print_problem(s[i]), end=' ')
    elif (t[i] == 'I-problem' or t[i] == 'X') and flag_problem == 1 :
        print(print_problem(s[i]), end=' ')    
    elif t[i] == 'O':
        flag_treatment, flag_problem, flag_test = 0, 0, 0 
        print(s[i], end=' ')

Over the next three days he increased his activity gradually , was able to do stairs with [4m[41m[37mPhysical[0m [4m[41m[37mTherapy[0m and had [1m[7m[46m[35mpain[0m which could be controlled with [4m[41m[37moral[0m [4m[41m[37manalgesics[0m . 

In [None]:
temp_lable = []
temp_token = []

# Add [CLS] at the front 
temp_lable.append('[CLS]')
temp_token.append('[CLS]')

# Tokenize words
# [lidocaine patch] -> lid ##oc ##aine patch
# [B-treatment I-treatment] -> B-treatment X X I-treatment
for word, lab in zip(word_list, label):
    token_list = tokenizer.tokenize(word)
    for m, token in enumerate(token_list):
        temp_token.append(token)
        if m == 0:
            temp_lable.append(lab)
        else:
            temp_lable.append('X')  
            
# Add [SEP] at the end
temp_lable.append('[SEP]')
temp_token.append('[SEP]')

In [5]:
class SentenceGetter(object) :
    """
    input a dataframe
    
    
    Generate sets of words and tags.
    self.sentence:
    Each sentence is a list
    [('Supraventricular', 'B-problem'),
    ('tachycardia', 'I-problem'),
    ('(', 'O'),
    ('on', 'O'),
    ('a', 'B-treatment'),
    ('beta', 'I-treatment'),
    ('blocker', 'I-treatment'),
    (')', 'O')]
    
    """
    def __init__(self, data):
        self.n_sent = 1
        self.data = data
        self.empty = False
        agg_func = lambda s: [(w, t) for w, t in zip(s["word"].values.tolist(),
                                                        #    s["POS"].values.tolist(),
                                                           s["tag"].values.tolist())]
        self.grouped = self.data.groupby("sentence #").apply(agg_func)
        self.sentences = [s for s in self.grouped]
    
    def get_next(self):
        try:
            s = self.grouped["sentence: {}".format(self.n_sent)]
            self.n_sent += 1
            return s
        except:
            return None
    
    def get_examples(self):
        return random.sample(self.sentences, 10)
    
    def get_sentences(self):
        return[[s[0] for s in sent] for sent in self.sentences]
        
    def get_labels(self):
        return [[s[1] for s in sent] for sent in self.sentences]

In [6]:
class InputGenerater(object):
    def __init__(self, sentences, labels) -> None:
        self.tokenized_texts = []
        self.word_piece_labels = []
        i_inc = 0
        for word_list,label in (zip(sentences,labels)):
            temp_lable = []
            temp_token = []
            
            # Add [CLS] at the front 
            temp_lable.append('[CLS]')
            temp_token.append('[CLS]')
            
            for word,lab in zip(word_list,label):
                token_list = tokenizer.tokenize(word)
                for m,token in enumerate(token_list):
                    temp_token.append(token)
                    if m==0:
                        temp_lable.append(lab)
                    else:
                        temp_lable.append('X')  
                        
            # Add [SEP] at the end
            temp_lable.append('[SEP]')
            temp_token.append('[SEP]')
            
            self.tokenized_texts.append(temp_token)
            self.word_piece_labels.append(temp_lable)
            
            if 5 > i_inc:
                print("No.%d,len:%d"%(i_inc,len(temp_token)))
                print("texts:%s"%(" ".join(temp_token)))
                print("No.%d,len:%d"%(i_inc,len(temp_lable)))
                print("lables:%s"%(" ".join(temp_lable)))
            i_inc +=1
            
        self.input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in self.tokenized_texts],
                    maxlen=config.MAX_LEN, dtype="long", truncating="post", padding="post")
            
    def get_input_ids(self):    
        return self.input_ids
    
    def get_tags(self):
        tags = pad_sequences([[get_tag2idx().get(l) for l in lab] for lab in self.word_piece_labels],
                        maxlen=config.MAX_LEN, value=get_tag2idx()["O"], padding="post",
                        dtype="long", truncating="post")
        return tags
    def get_attention_masks(self):
        attention_masks = [[int(i>0) for i in ii] for ii in self.input_ids]
        return attention_masks
    def get_segment_ids(self):
        segment_ids = [[0] * len(input_id) for input_id in self.input_ids]
        return segment_ids


def convert_to_tensor(*inputs, drop_last=False):
    data = TensorDataset(*tuple(torch.tensor(inputs)))
    data_sampler = RandomSampler(data)
    # Drop last can make batch training better for the last one
    dataloader = DataLoader(data, sampler=data_sampler, batch_size=config.BATCH_NUM, drop_last=drop_last)
    return dataloader

In [10]:
df_test = pd.read_csv(config.data_path_test, sep="\t").astype(str)
sg = SentenceGetter(df_test)
sentences = sg.get_sentences()
tags = sg.get_labels()
test_sets = InputGenerater(sentences=sentences, labels=tags)

test_inputs = test_sets.get_input_ids()
test_tags = test_sets.get_tags()
test_attetion_masks = test_sets.get_attention_masks()

test_dataloader = utils.convert_to_tensor(test_inputs, test_attetion_masks, test_tags)


No.0,len:5
texts:[CLS] admission date : [SEP]
No.0,len:5
lables:[CLS] O O O [SEP]
No.1,len:7
texts:[CLS] 2014 - 12 - 29 [SEP]
No.1,len:7
lables:[CLS] O X X X X [SEP]
No.2,len:6
texts:[CLS] all ##er ##gies : [SEP]
No.2,len:6
lables:[CLS] O X X O [SEP]
No.3,len:4
texts:[CLS] 17 units [SEP]
No.3,len:4
lables:[CLS] O O [SEP]
No.4,len:22
texts:[CLS] includes a history of at ##rial fi ##bri ##lla ##tion with good heart rate control on dig ##ox ##in . [SEP]
No.4,len:22
lables:[CLS] O O O O B-problem X I-problem X X X O O B-treatment I-treatment I-treatment O B-treatment X X O [SEP]


NameError: name 'pad_sequences' is not defined

In [40]:
import glob

In [43]:
f = glob.glob('processed/test/*')

In [45]:
os.listdir(config.INDIVIDUAL_TEST)

['0289.tsv',
 '0090.tsv',
 '0246.tsv',
 '0390.tsv',
 '0365.tsv',
 '0174.tsv',
 '0282.tsv',
 '0305.tsv',
 '0150.tsv',
 '0101.tsv',
 '0086.tsv',
 '0357.tsv',
 '0230.tsv',
 '0294.tsv',
 '0377.tsv',
 '0266.tsv',
 '0050.tsv',
 '0026.tsv',
 '0049.tsv',
 '0029.tsv',
 '0005.tsv',
 '0081.tsv',
 '0466.tsv',
 '0245.tsv',
 '0378.tsv',
 '0463.tsv',
 '0445.tsv',
 '0053.tsv',
 '0477.tsv',
 '0461.tsv',
 '0362.tsv',
 '0473.tsv',
 '0309.tsv',
 '0415.tsv',
 '0222.tsv',
 '0329.tsv',
 '0474.tsv',
 '0393.tsv',
 '0345.tsv',
 '0109.tsv',
 '0129.tsv',
 '0454.tsv',
 '0366.tsv',
 '0082.tsv',
 '0439.tsv',
 '0173.tsv',
 '0094.tsv',
 '0025.tsv',
 '0270.tsv',
 '0133.tsv',
 '0261.tsv',
 '0153.tsv',
 '0237.tsv',
 '0074.tsv',
 '0046.tsv',
 '0066.tsv',
 '0037.tsv',
 '0338.tsv',
 '0285.tsv',
 '0425.tsv',
 '0322.tsv',
 '0385.tsv',
 '0446.tsv',
 '0141.tsv',
 '0138.tsv',
 '0121.tsv',
 '0421.tsv',
 '0389.tsv',
 '0186.tsv',
 '0233.tsv',
 '0134.tsv',
 '0054.tsv',
 '0013.tsv',
 '0190.tsv',
 '0185.tsv',
 '0409.tsv',
 '0205.tsv',

In [61]:
f = open("eval_results.txt", "r")
print(f.read())

f1 socre:
1.0

Accuracy score:
1.0

              precision    recall  f1-score   support

     problem     1.0000    1.0000    1.0000         1
        test     1.0000    1.0000    1.0000         1
   treatment     1.0000    1.0000    1.0000         4

   micro avg     1.0000    1.0000    1.0000         6
   macro avg     1.0000    1.0000    1.0000         6
weighted avg     1.0000    1.0000    1.0000         6

