# Visualize the predictions and the labels

In [1]:
import pandas as pd
import ast

prediction_path = "../predictions/predictions_NERLens.csv"
label_path = "l-ner-data/test_NERLens with labels post annotator2.csv"

# Load CSV files
preds_df = pd.read_csv(prediction_path)
labels_df = pd.read_csv(label_path)

# Parse the string representations of lists in 'tokens' and 'ner_tags' columns
preds_df['tokens'] = preds_df['tokens'].apply(ast.literal_eval)
preds_df['ner_tags'] = preds_df['ner_tags'].apply(ast.literal_eval)
labels_df['ner_tags'] = labels_df['ner_tags'].apply(ast.literal_eval)

# Visualize the tokens, predicted tags, and actual labels
print(f"{'Token':_<40} {'Predicted tag':_<40} {'Label'}")
for i in range(len(preds_df)):
    tokens = preds_df['tokens'].iloc[i]
    predicted_tags = preds_df['ner_tags'].iloc[i]
    labels = labels_df['ner_tags'].iloc[i]
    
    for token, tag, label in zip(tokens, predicted_tags, labels):
        print(f"{token:_<40} {tag:_<40} {label}")
    print("-----------------------------------------------------------")


Token___________________________________ Predicted tag___________________________ Label
a_______________________________________ O_______________________________________ O
class___________________________________ O_______________________________________ O
action__________________________________ O_______________________________________ O
lawsuit_________________________________ O_______________________________________ O
has_____________________________________ O_______________________________________ O
been____________________________________ O_______________________________________ O
filed___________________________________ O_______________________________________ O
on______________________________________ O_______________________________________ B-LAW
behalf__________________________________ O_______________________________________ I-LAW
of______________________________________ O_______________________________________ O
all_____________________________________ O______________________

# Evaluate results on each tag

In [2]:
from sklearn.metrics import classification_report
import pandas as pd
import ast
import numpy as np

# Merge the predictions and labels dataframes on the 'id' column to ensure proper alignment
merged_df = pd.merge(labels_df[['id', 'ner_tags']], preds_df[['id', 'ner_tags']], on='id', suffixes=('_true', '_pred'))

# Flatten the true and predicted NER tags into single lists
flattened_true_tags = [tag for tags in merged_df['ner_tags_true'] for tag in tags]
flattened_pred_tags = [tag for tags in merged_df['ner_tags_pred'] for tag in tags]

# Define the tags to include in the report, excluding "O"
labels_to_include = list(set(flattened_true_tags) - {'O'})  # Unique tags without "O"

# Generate the classification report, excluding the "O" tag
report = classification_report(
    flattened_true_tags,
    flattened_pred_tags,
    labels=labels_to_include,  # Only include the specified tags
    digits=4
)

# Print the classification report
print(report)

               precision    recall  f1-score   support

B-VIOLATED BY     0.0894    0.0401    0.0554       399
I-VIOLATED ON     0.6206    0.2855    0.3911       865
I-VIOLATED BY     0.1145    0.0572    0.0763       594
B-VIOLATED ON     0.5106    0.2807    0.3623       342
  B-VIOLATION     0.8138    0.7152    0.7613       446
        I-LAW     0.9299    0.6868    0.7901       811
        B-LAW     0.8871    0.6707    0.7639       246
  I-VIOLATION     0.9021    0.7520    0.8202      7287

    micro avg     0.8316    0.6291    0.7163     10990
    macro avg     0.6085    0.4360    0.5026     10990
 weighted avg     0.7938    0.6291    0.6984     10990

