In [1]:
# Imports
import json
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [2]:
# Define your paths and threshold here
TEST_DATA_PATH = "/content/drive/MyDrive/266_final_project/data/test.tsv"
PREDICTIONS_PATH = "/content/drive/MyDrive/266_final_project/data/predictions.tsv"
EMOTION_FILE = "/content/drive/MyDrive/266_final_project/data/emotions.txt"
ADD_NEUTRAL = True
THRESHOLD = 0.3
OUTPUT_JSON_PATH = "/content/drive/MyDrive/266_final_project/results/results.json"  # Optional

In [3]:
# Load data
preds = pd.read_csv(PREDICTIONS_PATH, sep="\t")
true = pd.read_csv(TEST_DATA_PATH, sep="\t", header=None, names=["text", "labels", "id"])
emotions = open(EMOTION_FILE).read().splitlines()
if ADD_NEUTRAL:
    emotions.append("neutral")

num_emotions = len(emotions)
idx2emotion = {i: e for i, e in enumerate(emotions)}

# Create binary ground truth and prediction matrices
preds_mat = np.zeros((len(preds), num_emotions))
true_mat = np.zeros((len(preds), num_emotions))

for i in range(len(preds)):
    true_labels = [int(idx) for idx in true.loc[i, "labels"].split(",")]
    for j in range(num_emotions):
        preds_mat[i, j] = preds.loc[i, idx2emotion[j]]
        true_mat[i, j] = 1 if j in true_labels else 0

# Binarize predictions
pred_bin = (preds_mat > THRESHOLD).astype(int)

FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/266_final_project/data/predictions.tsv'

In [4]:
# Calculate metrics
results = {}

# Overall scores
results["accuracy"] = accuracy_score(true_mat, pred_bin)
results["macro_precision"], results["macro_recall"], results["macro_f1"], _ = precision_recall_fscore_support(true_mat, pred_bin, average="macro")
results["micro_precision"], results["micro_recall"], results["micro_f1"], _ = precision_recall_fscore_support(true_mat, pred_bin, average="micro")
results["weighted_precision"], results["weighted_recall"], results["weighted_f1"], _ = precision_recall_fscore_support(true_mat, pred_bin, average="weighted")

# Per-emotion scores
for i in range(num_emotions):
    emotion = idx2emotion[i]
    emotion_true = true_mat[:, i]
    emotion_pred = pred_bin[:, i]
    results[f"{emotion}_accuracy"] = accuracy_score(emotion_true, emotion_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(emotion_true, emotion_pred, average="binary", zero_division=0)
    results[f"{emotion}_precision"] = precision
    results[f"{emotion}_recall"] = recall
    results[f"{emotion}_f1"] = f1

NameError: name 'true_mat' is not defined

In [4]:
# Display results
for k, v in results.items():
    print(f"{k}: {v:.4f}")

In [None]:
# Save to JSON (optional)
with open(OUTPUT_JSON_PATH, "w") as f:
    json.dump(results, f, indent=2)
print(f"\nSaved results to {OUTPUT_JSON_PATH}")

Output Example:

- accuracy: 0.8427
- macro_f1: 0.4912
- anger_precision: 0.56
- anger_recall: 0.49
- anger_f1: 0.52