In [1]:
%reload_ext autoreload
%autoreload
%matplotlib inline
import os, json,glob,sys,io
from types import SimpleNamespace
from experiment import run_model
from eval import calculate_stats, eval_model
import pickle
from datetime import datetime 
import torch
from torch import nn
from transformers import BertTokenizer
from model.MedClinical import Biobert_fc 
from sklearn.metrics import classification_report, f1_score
from util.tools  import load_config
from util.DataLoader  import *
from pathlib import Path, PureWindowsPath, PurePosixPath
import traceback
from matplotlib import pyplot as plt
import pandas as pd

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = "cpu"

In [3]:
config_folder = "config/"
config = load_config(config_folder)

In [4]:
dataprocessor = MultiClassificationProcessor()
test_dataloader, dev_data_len, dev_num_labels, dev_num_train_optimization_steps, all_dev_label_ids = dataprocessor.get_data_loader(config,source='test')    

100%|███████████████████████████████████████████████████████████████████████████████████| 50010/50010 [09:45<00:00, 85.48it/s]
INFO:root:***** Running training *****
INFO:root:  Num examples = 50010
INFO:root:  Batch size = 24
INFO:root:  Num steps = 20830


In [5]:
def save_missed_cases_to_file(file_name, preds, label_ids, inputs):
    tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False)
    missed_cases = []
    for i in range(0,len(label_ids)):
        if label_ids[i] !=  preds[i]:
             missed_cases.append([ file_name, preds[i],  label_ids[i] , " ". join (tokenizer.convert_ids_to_tokens(inputs[i])) ])

    #Save into a file
    missed_cases_file = config.programsettings.REPORTS_DIR +'test_results_' + str(datetime.now()).replace(":", "_").replace(".", "_") + ".pkl"
    with open(missed_cases_file, "wb") as f:
        pickle.dump(missed_cases, f)  
        
    

In [6]:
models_folder_name = config.programsettings.OUTPUT_DIR
data_folder = Path(models_folder_name)
list_model_files = list(data_folder.glob('*.bin'))
num_labels = 9

f1_score_list = []
for model_file in list_model_files:
    try:
        print("\n Model file: ", model_file)
        model = torch.load(model_file)
#         model = nn.DataParallel(model)
        x =model.eval()
        test_inputs, test_preds, test_labels, test_loss = eval_model( config, model, test_dataloader, device, num_labels)  
        print(classification_report(test_labels,test_preds ))
        print("\n \n \n")
        f1_value = f1_score(test_labels, test_preds,average='macro')
        f1_score_list.append([model_file,f1_value ])
        file_name = model_file.__str__().replace(".", "_") 
        save_missed_cases_to_file( file_name, test_preds, test_labels, test_inputs)
        
    except:
#         print("Failed due to cpu & GPU incompatibility")
     # printing stack trace 
        traceback.print_exc()         

print("\n \n \n")





 Model file:  outputs\re\BioBERT_fc2020-11-25 09_50_29_204646.bin


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=2084.0, style=ProgressStyle(description_…


              precision    recall  f1-score   support

           0       0.96      0.97      0.97      3464
           1       0.98      0.99      0.98      3723
           2       0.99      0.99      0.99      4274
           3       0.99      0.99      0.99      4090
           4       0.95      0.96      0.95       420
           5       0.98      0.96      0.97      3024
           6       0.97      0.98      0.98      2802
           7       0.90      0.84      0.87       775
           8       1.00      1.00      1.00     27438

    accuracy                           0.99     50010
   macro avg       0.97      0.96      0.97     50010
weighted avg       0.99      0.99      0.99     50010


 
 


 Model file:  outputs\re\BioBERT_fc2020-11-25 12_55_42_931790.bin


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=2084.0, style=ProgressStyle(description_…


              precision    recall  f1-score   support

           0       0.96      0.97      0.97      3464
           1       0.97      0.98      0.98      3723
           2       0.99      0.99      0.99      4274
           3       0.99      0.99      0.99      4090
           4       0.94      0.96      0.95       420
           5       0.98      0.96      0.97      3024
           6       0.98      0.98      0.98      2802
           7       0.88      0.85      0.87       775
           8       1.00      1.00      1.00     27438

    accuracy                           0.99     50010
   macro avg       0.97      0.97      0.97     50010
weighted avg       0.99      0.99      0.99     50010


 
 


 Model file:  outputs\re\BioBERT_fc2020-11-25 17_55_47_519417.bin


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=2084.0, style=ProgressStyle(description_…


              precision    recall  f1-score   support

           0       0.95      0.98      0.97      3464
           1       0.97      0.98      0.98      3723
           2       0.99      0.99      0.99      4274
           3       0.99      0.99      0.99      4090
           4       0.95      0.96      0.96       420
           5       0.98      0.96      0.97      3024
           6       0.97      0.98      0.98      2802
           7       0.91      0.83      0.87       775
           8       1.00      1.00      1.00     27438

    accuracy                           0.99     50010
   macro avg       0.97      0.96      0.97     50010
weighted avg       0.99      0.99      0.99     50010


 
 


 Model file:  outputs\re\BioBERT_fc2020-11-25 22_55_31_854296.bin


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=2084.0, style=ProgressStyle(description_…


              precision    recall  f1-score   support

           0       0.96      0.97      0.97      3464
           1       0.98      0.98      0.98      3723
           2       0.99      0.99      0.99      4274
           3       0.99      0.99      0.99      4090
           4       0.95      0.97      0.96       420
           5       0.98      0.96      0.97      3024
           6       0.97      0.98      0.98      2802
           7       0.89      0.86      0.88       775
           8       1.00      1.00      1.00     27438

    accuracy                           0.99     50010
   macro avg       0.97      0.97      0.97     50010
weighted avg       0.99      0.99      0.99     50010


 
 


 
 



In [7]:
pd.set_option('display.max_colwidth', 120)
columns = ['file_namae', 'f1-score']
test_results= pd.DataFrame(f1_score_list, columns = columns)
test_results

Unnamed: 0,file_namae,f1-score
0,outputs\re\BioBERT_fc2020-11-25 09_50_29_204646.bin,0.966152
1,outputs\re\BioBERT_fc2020-11-25 12_55_42_931790.bin,0.965632
2,outputs\re\BioBERT_fc2020-11-25 17_55_47_519417.bin,0.966497
3,outputs\re\BioBERT_fc2020-11-25 22_55_31_854296.bin,0.968093


In [8]:
# print ("F1 Score summary \n")  
# ax = plt.gca()
# ax.get_xaxis().set_visible(False)
# ax.get_yaxis().set_visible(False)
# plt.box(on=None)
# plt.figure(figsize=(14,6))
# the_table = plt.table(cellText=f1_score_list,
#                       colLabels=['file name', 'F1-score'],
#                       loc='center')
# the_table.auto_set_font_size(False)
# the_table.set_fontsize(10)