#### Dependencies

In [None]:
import pandas as pd
from sklearn.metrics import classification_report, precision_recall_fscore_support, hamming_loss, jaccard_score
from sklearn.preprocessing import MultiLabelBinarizer
import warnings
from sklearn.exceptions import UndefinedMetricWarning
warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

#### Data

In [None]:
data = pd.read_csv('input-file.csv')

In [None]:
def parse_labels(label_str):

    if pd.isna(label_str) or label_str.strip() == "":
        return []
    
    parts = label_str.split(",")
    cleaned_labels = []
    for part in parts:
        cleaned = part.strip().replace("_", " ")
        cleaned_labels.append(cleaned)
    
    return cleaned_labels


In [None]:
data["pred_labels"] = data["classification"].apply(parse_labels)
data["FinalLabels"] = data["FinalLabels"].apply(parse_labels)

In [None]:
def normalize_label(label):
    return label.strip().lower()

In [None]:
def get_first_label(label):
    if isinstance(label, list) and label:
        return label[0]
    return label

data["true_cat"] = data["FinalLabels"].apply(get_first_label)
unique_categories = data["true_cat"].unique()

#### Detection Counts

In [None]:
report = {}

for cat in unique_categories:
   
    norm_cat = normalize_label(cat)
    actual_count = sum(1 for x in data["true_cat"] if x == cat)
    
    detected_count = 0

    for _, row in data.iterrows():
        
        if row["true_cat"] == cat:
            preds = row["pred_labels"]

            if isinstance(preds, list):
                normalized_preds = [normalize_label(p) for p in preds]
                if norm_cat in normalized_preds:
                    detected_count += 1
                    
    report[cat] = {"actual": actual_count, "detected": detected_count}

print("Report:")
for cat, counts in report.items():
    print(f"{cat}: Actual: {counts['actual']} | Model Detected: {counts['detected']}")


#### Precision, Recall, F1-Score

In [None]:
# Normalizing string by removing white space and making it lower case
def normalize(label):
    return label.strip().lower()

report_metrics = {} # Dictionary to store all the values
unique_categories = data["true_cat"].unique() # storing all unique category


for cat in unique_categories:
    norm_cat = normalize(cat)
    
    # actual count in data
    actual_count = (data["true_cat"].apply(normalize) == norm_cat).sum()
    
    true_list = data["true_cat"].apply(normalize).tolist()

   
   # Normalize predicted data
    pred_list = []
    for preds in data["pred_labels"]:
        if isinstance(preds, list):
            norm_preds = [normalize(p) for p in preds]
            pred_list.append(norm_preds)
        else:
            pred_list.append([])

    # True Positives counts
    tp = 0
    for true_label, preds in zip(true_list, pred_list):
        if true_label == norm_cat and norm_cat in preds:
            tp += 1

    # Total Predicted Count
    predicted_count = 0
    for preds in pred_list:
        if norm_cat in preds:
            predicted_count += 1
    
    precision = tp / predicted_count if predicted_count > 0 else 0
    recall = tp / actual_count if actual_count > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
    
    report_metrics[cat] = {
        "actual": actual_count,
        "predicted": predicted_count,
        "tp": tp,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

print("Evaluation Report:")
for cat, metrics in report_metrics.items():
    print(f"Category: {cat}")
    print(f"  Actual: {metrics['actual']}")
    print(f"  Model Predicted: {metrics['predicted']}")
    print(f"  True Positives: {metrics['tp']}")
    print(f"  Precision: {metrics['precision']:.2f}")
    print(f"  Recall: {metrics['recall']:.2f}")
    print(f"  F1 Score: {metrics['f1']:.2f}\n")
