In [2]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score

# ─── 0. CONFIG ────────────────────────────────────────────────────────────────
results = {
    "VGG16": {
        "March":   r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_March\Results\vgg16\tree_classes\confidence_scores.csv",
        "July":    r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\All_results\vgg16\confidence_scores.csv",
        "October": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_October\Results\vgg16\confidence_scores.csv",
    },
    "ViT": {
        "March":   r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_March\Results\ViT\predictions.csv",
        "July":    r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\All_results\ViT\predictions_output.csv",
        "October": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_October\Results\ViT\predictions_output.csv",
    },
}

out_dir = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\comparison_between_seasons"
os.makedirs(out_dir, exist_ok=True)

season_order = ["March", "July", "October"]
model_hue_order = ["ViT", "VGG16"]

# warm‐tone palette
palette = {
    "ViT":   "#27619e",   # orange
    "VGG16": "#228238",   # red
}

# whitegrid theme
sns.set_theme(style="whitegrid")

# ─── 1. MACRO-F1 BAR CHART ─────────────────────────────────────────────────────
macro_rows = []
for model, seasons in results.items():
    for season, path in seasons.items():
        df = pd.read_csv(path)
        if model == "VGG16":
            y_true = df["true"]
            y_pred = df["pred"]
        else:
            y_true = df["true_label"]
            y_pred = df["pred_label"]
        present = sorted(y_true.unique())
        m = f1_score(y_true, y_pred, labels=present, average="macro")
        macro_rows.append({"Model": model, "Season": season, "Macro F1": m})

macro_df = pd.DataFrame(macro_rows)

plt.figure(figsize=(7,4))
sns.barplot(
    data=macro_df,
    x="Season", y="Macro F1", hue="Model",
    order=season_order, hue_order=model_hue_order,
    palette=palette
)
plt.ylim(0,1)
plt.ylabel("Macro F1")
plt.title("Macro F1 by Model & Season")
plt.legend(title="", loc="upper left")
plt.tight_layout()
plt.savefig(os.path.join(out_dir, "macro_f1_by_season.png"),
            dpi=300, bbox_inches="tight")
plt.close()

# ─── 2. PER-CLASS ΔF1 (Oct − July) ────────────────────────────────────────────
delta_rows = []
for model, seasons in results.items():
    # load July & October
    dfj = pd.read_csv(seasons["July"])
    dfo = pd.read_csv(seasons["October"])
    if model == "VGG16":
        yj, pj = dfj["true"], dfj["pred"]
        yo, po = dfo["true"], dfo["pred"]
    else:
        yj, pj = dfj["true_label"], dfj["pred_label"]
        yo, po = dfo["true_label"], dfo["pred_label"]
    classes = sorted(set(yj) & set(yo))
    for cls in classes:
        f1_j = f1_score(yj, pj, labels=[cls], average="macro")
        f1_o = f1_score(yo, po, labels=[cls], average="macro")
        delta_rows.append({"Model": model, "Class": cls, "Delta F1": f1_o - f1_j})

delta_df = pd.DataFrame(delta_rows)

plt.figure(figsize=(9,4))
sns.barplot(
    data=delta_df,
    x="Class", y="Delta F1", hue="Model",
    hue_order=model_hue_order,
    palette=palette
)
plt.axhline(0, color="k", linewidth=0.8)
plt.xticks(rotation=45, ha="right")
plt.ylabel("Δ F1 (October − July)")
plt.title("Change in Per-Class F1 from July to October")
plt.legend(title="", loc="lower left")
plt.tight_layout()
plt.savefig(os.path.join(out_dir, "delta_f1_per_class.png"),
            dpi=300, bbox_inches="tight")
plt.close()


In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import f1_score, confusion_matrix

# === 0. CONFIGURATION ===
# Fill in your actual file paths here
results = {
    "vgg16": {
        "July": {
            "pred_csv": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\All_results\vgg16\confidence_scores.csv",
            "confmat_csv": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\All_results\vgg16\benchmark_confusion_matrix.csv"
        },
        "October": {
            "pred_csv": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_October\Results\vgg16\confidence_scores.csv",
            "confmat_csv": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_October\Results\vgg16\analysis\confusion_matrix.csv"
        }
    },
    "ViT": {
        "July": {
            "pred_csv": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\All_results\ViT\predictions_output.csv",
            "confmat_csv": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\All_results\ViT\confusion_matrix.csv"
        },
        "October": {
            "pred_csv": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_October\Results\ViT\predictions_output.csv",
            "confmat_csv": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_October\Results\ViT\analysis\confusion_matrix.csv"
        }
    }
}

output_dir = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_October\Results\comparison"
os.makedirs(output_dir, exist_ok=True)

# === 1. AGGREGATE MACRO-F1 TABLE ===
macro_f1 = []
for model, seasons in results.items():
    for season, paths in seasons.items():
        df = pd.read_csv(paths["pred_csv"])
        # normalize column names
        if model=="vgg16":
            y_true = df["true"]
            y_pred = df["pred"]
        else:
            y_true = df["true_label"]
            y_pred = df["pred_label"]
        m = f1_score(y_true, y_pred, average="macro")
        macro_f1.append({"model": model, "season": season, "macro_f1": m})
macro_df = pd.DataFrame(macro_f1)
macro_df.to_csv(os.path.join(output_dir, "macro_f1_comparison.csv"), index=False)

# Plot grouped bar
plt.figure(figsize=(6,4))
sns.barplot(data=macro_df, x="season", y="macro_f1", hue="model", palette=["blue","green"])
plt.ylim(0,1)
plt.ylabel("Macro F₁ Score")
plt.title("Macro F₁ by Model and Season")
plt.tight_layout()
plt.savefig(os.path.join(output_dir, "macro_f1_by_season.png"))
plt.close()

# === 2. PER-CLASS F1 HEATMAP ===
for model, seasons in results.items():
    classes = None
    f1_matrix = {}
    for season, paths in seasons.items():
        df = pd.read_csv(paths["pred_csv"])
        if model=="vgg16":
            y_true = df["true"]
            y_pred = df["pred"]
        else:
            y_true = df["true_label"]
            y_pred = df["pred_label"]
        # compute per-class f1
        cls = sorted(y_true.unique())
        classes = cls
        f1s = []
        for c in cls:
            f1s.append(f1_score(y_true, y_pred, labels=[c], average="macro"))
        f1_matrix[season] = f1s
    heat_df = pd.DataFrame(f1_matrix, index=classes)
    heat_df.to_csv(os.path.join(output_dir, f"{model}_per_class_f1.csv"))
    plt.figure(figsize=(6,5))
    sns.heatmap(heat_df, annot=True, fmt=".2f", cmap="YlGnBu", vmin=0, vmax=1)
    plt.title(f"{model} Per-Class F₁ Across Seasons")
    plt.ylabel("Class")
    plt.xlabel("Season")
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f"{model}_per_class_f1_heatmap.png"))
    plt.close()

# === 3. DELTA F1 BAR CHART ===
delta = []
for model, df in results.items():
    # load per-class f1 data
    f = pd.read_csv(os.path.join(output_dir, f"{model}_per_class_f1.csv"), index_col=0)
    delta_vals = f["October"] - f["July"]
    for cls, dv in delta_vals.items():
        delta.append({"model": model, "class": cls, "delta_f1": dv})
delta_df = pd.DataFrame(delta)
delta_df.to_csv(os.path.join(output_dir, "delta_f1.csv"), index=False)

plt.figure(figsize=(8,4))
sns.barplot(data=delta_df, x="class", y="delta_f1", hue="model", palette=["blue","green"])
plt.axhline(0, color="black", linewidth=0.8)
plt.ylabel("Δ F₁ (Oct − July)")
plt.xticks(rotation=45, ha="right")
plt.title("Change in Per-Class F₁ from July to October")
plt.tight_layout()
plt.savefig(os.path.join(output_dir, "delta_f1_per_class.png"))
plt.close()

# === 4. CONFUSION MATRIX DIFFERENCE ===
for model, seasons in results.items():
    cm_j = pd.read_csv(seasons["July"]["confmat_csv"], index_col=0)
    cm_o = pd.read_csv(seasons["October"]["confmat_csv"], index_col=0)
    diff = cm_o - cm_j
    diff.to_csv(os.path.join(output_dir, f"{model}_confmat_diff.csv"))
    plt.figure(figsize=(6,5))
    sns.heatmap(diff, annot=True, center=0, cmap="vlag", fmt="d")
    plt.title(f"{model} Confusion Matrix Difference (Oct−July)")
    plt.ylabel("True")
    plt.xlabel("Predicted")
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f"{model}_confmat_diff.png"))
    plt.close()



In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import f1_score, confusion_matrix

# ─── 0. CONFIGURATION ────────────────────────────────────────────────────────
results = {
    "vgg16": {
        "March": {
            "pred_csv":      r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_March\Results\vgg16\tree_classes\confidence_scores.csv",
            "confmat_csv":   r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_March\Results\vgg16\tree_classes\confusion_matrix.csv"
        },
        "July": {
            "pred_csv": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\All_results\vgg16\confidence_scores.csv",
            "confmat_csv": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\All_results\vgg16\benchmark_confusion_matrix.csv"
        },
        "October": {
            "pred_csv": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_October\Results\vgg16\confidence_scores.csv",
            "confmat_csv": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_October\Results\vgg16\analysis\confusion_matrix.csv"
        },
    },
    "ViT": {
        "March": {
            "pred_csv":      r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_March\Results\ViT\predictions.csv",
            "confmat_csv":   r"C:\…\Raw_March\Results\ViT\analysis\confusion_matrix.csv"
        },
        "July": {
            "pred_csv": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\All_results\ViT\predictions_output.csv",
            "confmat_csv": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\All_results\ViT\confusion_matrix.csv"
        },
        "October": {
            "pred_csv": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_October\Results\ViT\predictions_output.csv",
            "confmat_csv": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_October\Results\ViT\analysis\confusion_matrix.csv"
        },
    }
}


output_dir = r"C:\…\Results\comparison"
os.makedirs(output_dir, exist_ok=True)

# Define the season order for consistent plotting
season_order = ["March", "July", "October"]
model_palette = {"vgg16":"blue", "ViT":"green"}

# ─── 1. MACRO-F1 TABLE & BARPLOT ─────────────────────────────────────────────
macro_f1 = []
for model, seasons in results.items():
    for season, paths in seasons.items():
        df = pd.read_csv(paths["pred_csv"])
        # pick the right columns
        if model == "vgg16":
            y_true, y_pred = df["true"], df["pred"]
        else:
            y_true, y_pred = df["true_label"], df["pred_label"]

        m = f1_score(y_true, y_pred, average="macro")
        macro_f1.append({"model": model, "season": season, "macro_f1": m})

macro_df = pd.DataFrame(macro_f1)
macro_df.to_csv(os.path.join(output_dir, "macro_f1_comparison.csv"), index=False)

plt.figure(figsize=(6,4))
sns.barplot(
    data=macro_df, 
    x="season", 
    y="macro_f1", 
    hue="model", 
    order=season_order, 
    palette=model_palette
)
plt.ylim(0,1)
plt.ylabel("Macro F₁ Score")
plt.title("Macro F₁ by Model and Season")
plt.tight_layout()
plt.savefig(os.path.join(output_dir, "macro_f1_by_season.png"))
plt.close()

# ─── 2. PER-CLASS F₁ HEATMAP ────────────────────────────────────────────────
for model, seasons in results.items():
    classes = None
    f1_matrix = {}
    for season, paths in seasons.items():
        df = pd.read_csv(paths["pred_csv"])
        if model=="vgg16":
            y_true, y_pred = df["true"], df["pred"]
        else:
            y_true, y_pred = df["true_label"], df["pred_label"]

        # ensure same class ordering each season
        cls = sorted(y_true.unique())
        classes = cls
        f1s = [f1_score(y_true, y_pred, labels=[c], average="macro") for c in cls]
        f1_matrix[season] = f1s

    heat_df = pd.DataFrame(f1_matrix, index=classes).reindex(columns=season_order)
    heat_df.to_csv(os.path.join(output_dir, f"{model}_per_class_f1.csv"))

    plt.figure(figsize=(7,5))
    sns.heatmap(
        heat_df, 
        annot=True, 
        fmt=".2f", 
        cmap="YlGnBu", 
        vmin=0, vmax=1, 
        cbar_kws={"label":"F₁ Score"}
    )
    plt.title(f"{model} Per-Class F₁ Across Seasons")
    plt.ylabel("Class")
    plt.xlabel("Season")
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f"{model}_per_class_f1_heatmap.png"))
    plt.close()

# ─── 3. ΔF₁ VS JULY BAR CHART (for March & October) ─────────────────────────
delta = []
for model in results:
    f = pd.read_csv(os.path.join(output_dir, f"{model}_per_class_f1.csv"), index_col=0)
    for season in ["March","October"]:
        dv = f[season] - f["July"]
        for cls, val in dv.items():
            delta.append({"model": model, "class": cls, "season": season, "delta_f1": val})

delta_df = pd.DataFrame(delta)
delta_df.to_csv(os.path.join(output_dir, "delta_f1_comparison.csv"), index=False)

plt.figure(figsize=(8,4))
sns.barplot(
    data=delta_df, 
    x="class", 
    y="delta_f1", 
    hue="season", 
    palette=["orange","purple"], 
    order=sorted(delta_df["class"].unique())
)
plt.axhline(0, color="black", linewidth=0.8)
plt.ylabel("Δ F₁ (Season − July)")
plt.xticks(rotation=45, ha="right")
plt.title("Change in Per-Class F₁ vs July")
plt.tight_layout()
plt.savefig(os.path.join(output_dir, "delta_f1_per_class.png"))
plt.close()

# ─── 4. CONFUSION MATRIX DIFFERENCES ────────────────────────────────────────
for model, seasons in results.items():
    cm_j = pd.read_csv(seasons["July"]["confmat_csv"], index_col=0)
    for season in ["March","October"]:
        cm_s = pd.read_csv(seasons[season]["confmat_csv"], index_col=0)
        diff = cm_s - cm_j
        diff.to_csv(os.path.join(output_dir, f"{model}_confmat_diff_{season}_vs_July.csv"))

        plt.figure(figsize=(6,5))
        sns.heatmap(
            diff, 
            annot=True, 
            center=0, 
            cmap="vlag", 
            fmt="d",
            cbar_kws={"label": "Δ count"}
        )
        plt.title(f"{model} Confusion-Matrix Δ: {season} vs July")
        plt.ylabel("True")
        plt.xlabel("Predicted")
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, f"{model}_confmat_diff_{season}_vs_July.png"))
        plt.close()
