# Evaluation of the final BART model

In [4]:
from typing import List
from src.bart.BartModel import BartCustomModel
from seqeval.metrics import classification_report
from src.util.utils import load_bart_data,transform_to_iob
from pandas import DataFrame

path_bart: str = "../Saved_Models/BART"
path_train: str = "../../data/bart_train_data_single.csv"
path_validation: str = "../../data/bart_dev_data_single.csv"
path_test: str = "../../data/bart_test_data.csv"

### Load the Data

In [5]:
_, _, bart_test = load_bart_data(path_train, path_validation, path_test)

In [14]:
bart_test.head()

Unnamed: 0,input_text,entities,entity names
0,"True, our Constitution has no 'due process' cl...","[Constitution, R.C. Cooper v. Union of India, ...","[statute, precedent, precedent]"
1,(See Principles of Statutory Interpretation by...,[G.P. Singh],[judge]
2,"Their Lordships have said -- ""It is a sound ru...","[England, Heydon]","[location, person]"
3,"In para 13 of the plaint, it has been further ...","[29/12/2004, Delhi]","[date, location]"
4,Counsel for appellants contended that who is t...,"[Rule 2, Section 172, Hyderabad Land Revenue A...","[provision, provision, statute]"


In [15]:
# get info about the amount of classes and class labels
bart_unique_classes: List = bart_test["entity names"].explode().unique().tolist()
bart_unique_classes

['statute',
 'precedent',
 'judge',
 'location',
 'person',
 'date',
 'provision',
 'case number',
 'curt',
 'no entities found',
 'organization',
 'petitioner',
 'witness',
 'respondent',
 'lawyer']

In [16]:
bart_model: BartCustomModel = BartCustomModel(model_path=path_bart, all_classes=bart_unique_classes)

In [17]:
unwrapped_predictions = []
predictions, classes = bart_model.predict_single(texts=bart_test["input_text"])

for prediction in predictions:
    unwrapped_predictions.append([" ".join(p) for p in prediction])

Prediction:   0%|          | 0/1074 [00:00<?, ?it/s]

# Transform the predictions to BIO format to run evaluation

In [20]:
bart_iob_gold_entities, gold_texts = transform_to_iob(
    texts=bart_test["input_text"].to_list(),
    entities=bart_test["entities"].to_list(),
    names=bart_test["entity names"].to_list(),
)

In [21]:
bart_prediction_iob, prediction_texts = transform_to_iob(
    texts=bart_test["input_text"].to_list(),
    entities=unwrapped_predictions,
    names=classes
)

# Run Evaluation

In [23]:
result = classification_report(bart_iob_gold_entities, bart_prediction_iob, output_dict=True)
d = DataFrame.from_dict(result)
d.head(10)

Unnamed: 0,CASENUMBER,CURT,DATE,JUDGE,LAWYER,LOCATION,ORGANIZATION,PERSON,PETITIONER,PRECEDENT,PROVISION,RESPONDENT,STATUTE,WITNESS,micro avg,macro avg,weighted avg
precision,0.604938,0.827586,0.861925,0.916667,0.910448,0.578035,0.470588,0.837638,0.758621,0.578378,0.824138,0.701863,0.75502,0.919355,0.750541,0.753229,0.774691
recall,0.837607,0.83045,0.953704,0.661654,0.405765,0.724638,0.693333,0.897233,0.717391,0.629412,0.952191,0.45749,0.917073,0.982759,0.727463,0.761479,0.727463
f1-score,0.702509,0.829016,0.905495,0.768559,0.56135,0.643087,0.560647,0.866412,0.73743,0.602817,0.883549,0.553922,0.828194,0.95,0.738822,0.742356,0.729012
support,117.0,289.0,216.0,133.0,451.0,138.0,150.0,253.0,184.0,170.0,251.0,247.0,205.0,58.0,2862.0,2862.0,2862.0
