In [None]:
import json
from pathlib import Path

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (6, 4)


In [None]:
def load_json(path):
    path = Path(path)
    if not path.exists():
        return None
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

rows = []


In [None]:
# Task A
base_a = Path("../task_a/results/logs")
for name, fname in [
    ("A-TFIDF-LogReg", "tfidf_baseline_metrics.json"),
    ("A-XGBoost", "xgb_baseline_metrics.json"),
    ("A-Transformer-GraphCodeBERT", "transformer_graphcodebert_metrics.json"),
    ("A-Ensemble", "ensemble_metrics.json"),
]:
    m = load_json(base_a / fname)
    if m is None:
        continue
    rows.append({"run": name, "task": "A"} | m)

# Task B
base_b = Path("../task_b/results/logs")
for name, fname in [
    ("B-TFIDF+SVM", "task_b_tfidf_svm_metrics.json"),
    ("B-Transformer-CodeBERT", "transformer_codebert_metrics.json"),
    ("B-Transformer-CodeT5", "transformer_codet5_metrics.json"),
]:
    m = load_json(base_b / fname)
    if m is None:
        continue
    rows.append({"run": name, "task": "B"} | m)

# Task C
base_c = Path("../task_c/results/logs")
m = load_json(base_c / "task_c_metrics.json")
if m is not None:
    rows.append({"run": "C-Token-Transformer", "task": "C"} | m)

metrics_df = pd.DataFrame(rows)
metrics_df


In [None]:
plt.figure(figsize=(8, 4))
sns.barplot(
    data=metrics_df.sort_values("macro_f1", ascending=False),
    x="macro_f1",
    y="run",
    hue="task",
)
plt.title("Macro-F1 comparison across all models and tasks")
plt.xlabel("Macro-F1")
plt.ylabel("Run")
plt.xlim(0, 1)
plt.legend(title="Task")
plt.tight_layout()
plt.show()


In [None]:
best_per_task = (
    metrics_df.sort_values("macro_f1", ascending=False)
    .groupby("task")
    .head(1)
    .reset_index(drop=True)
)
best_per_task
