In [91]:
import pandas as pd
from sklearn.metrics import classification_report

In [92]:
true_labels = pd.read_excel('test_true.xlsx')
true_labels.head()

Unnamed: 0,doc_id,token,start_idx,end_idx,entity
0,121116,Admission,0,9,O
1,121116,Date,10,14,O
2,121116,:,14,15,O
3,121116,[,17,18,O
4,121116,*,18,19,O


In [93]:
pred_labels = pd.read_excel('test_pred.xlsx')
pred_labels.head()

Unnamed: 0,doc_id,token,start_idx,end_idx,entity
0,121116,Admission,0,9,O
1,121116,Date,10,14,O
2,121116,:,14,15,O
3,121116,[,17,18,O
4,121116,*,18,19,O


In [94]:
pred_labels['entity'].unique()

array(['O', 'B-Drug', 'B-Reason', 'I-Reason', 'B-Strength', 'I-Strength',
       'B-Route', 'B-Frequency', 'I-Frequency', 'I-Drug', 'B-Form',
       'B-Dosage', 'I-Dosage', 'I-Form', 'I-Duration', 'B-Duration',
       'I-Route', 'B-ADE'], dtype=object)

In [95]:
def get_related_tags(true_labels, pred_labels):
    """
    Takes the dataframe and returns concatenated tags
    strict_gt = [[Admission, O], [Date, O], [eye, B-ADE, discharge, I-ADE], [the, O]]
    pred_gt = [[Admission, O], [Date, O], [eye, B-ADE, discharge, B-drug], [the, O]]
    
    """
    
    true_tags = true_labels[['token', 'entity']].values.tolist()
    pred_tags = pred_labels[['token', 'entity']].values.tolist()
    
    true_gt = []
    pred_gt = []
    
    j = 0
    for i in range(len(true_tags)):
        if true_tags[i][1].startswith('I-'):
            true_gt[j-1].append(true_tags[i][0])
            true_gt[j-1].append(true_tags[i][1])
            pred_gt[j-1].append(pred_tags[i][0])  
            pred_gt[j-1].append(pred_tags[i][1])
        else:
            true_gt.append(true_tags[i])
            pred_gt.append(pred_tags[i])
            j += 1
            
    return true_gt, pred_gt

In [96]:
true_gt, pred_gt = get_related_tags(true_labels, pred_labels)
print(len(true_gt), len(pred_gt))

509547 509547


In [97]:
def get_predictions(true_gt, pred_gt):
    """
    Takes the concatenated related tokens and tags list and returns three list:
    ground truth, strict predictions and loose predictions
    """

    gold_entity = []
    pred_strict = []
    pred_loose = []

    # Loop through the length of predictions and find strict and loose predictions
    for i in range(len(true_gt)):
        # exact match
        if true_gt[i] == pred_gt[i]:
            if true_gt[i][1] != 'O':
                pred_strict.append(true_gt[i][1].split('-')[1])
                pred_loose.append(true_gt[i][1].split('-')[1])
            else:
                pred_strict.append('O')
                pred_loose.append('O')


        else:
            # loose match
            if len([i for i, j in zip(true_gt[i], pred_gt[i]) if i == j]) >= (len(true_gt[i])//2)+1:
                if true_gt[i][1] != 'O':
                    pred_loose.append(true_gt[i][1].split('-')[1])  
                else:   
                    pred_loose.append('O')

                if pred_gt[i][1] != 'O':
                    pred_strict.append(pred_gt[i][1].split('-')[1])
                else:
                    pred_strict.append('O')


            else:
                # no match
                if pred_gt[i][1] != 'O':
                    pred_strict.append(pred_gt[i][1].split('-')[1])
                    pred_loose.append(pred_gt[i][1].split('-')[1])
                else:
                    pred_strict.append('O')
                    pred_loose.append('O')

        if true_gt[i][1] != 'O':
            gold_entity.append(true_gt[i][1].split('-')[1])
        else:
            gold_entity.append('O')
            
    return gold_entity, pred_strict, pred_loose

In [98]:
gold_entity, pred_strict, pred_loose = get_predictions(true_gt, pred_gt)

In [99]:
print(len(pred_loose), len(pred_strict), len(gold_entity))

509547 509547 509547


### Evaluation on Strict predictions

In [100]:
print(classification_report(y_true=gold_entity,  y_pred=pred_strict))

              precision    recall  f1-score   support

         ADE       0.00      0.00      0.00       672
      Dosage       0.86      0.84      0.85      2681
        Drug       0.76      0.78      0.77     10582
    Duration       0.53      0.48      0.50       378
        Form       0.89      0.85      0.87      4359
   Frequency       0.58      0.88      0.70      4012
           O       0.99      0.99      0.99    476530
      Reason       0.35      0.48      0.40      2590
       Route       0.83      0.79      0.81      3513
    Strength       0.88      0.89      0.89      4230

    accuracy                           0.97    509547
   macro avg       0.67      0.70      0.68    509547
weighted avg       0.97      0.97      0.97    509547



### Evaluation on Loose Predictions

In [101]:
print(classification_report(y_true=gold_entity,  y_pred=pred_loose))

              precision    recall  f1-score   support

         ADE       0.75      0.00      0.01       672
      Dosage       0.86      0.84      0.85      2681
        Drug       0.76      0.78      0.77     10582
    Duration       0.60      0.63      0.62       378
        Form       0.89      0.86      0.87      4359
   Frequency       0.59      0.89      0.71      4012
           O       0.99      0.99      0.99    476530
      Reason       0.37      0.51      0.43      2590
       Route       0.83      0.79      0.81      3513
    Strength       0.89      0.90      0.89      4230

    accuracy                           0.97    509547
   macro avg       0.75      0.72      0.70    509547
weighted avg       0.98      0.97      0.97    509547

