## baseline vs all season

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt
import os

# ─── 0. CONFIG ─────────────────────────────────────────────────────────────────
# Prediction CSVs must have columns: patch_filename, true_label, pred_label, confidence
models = {
    "July-only model": {
        "March":   r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_March\Results\ViT\predictions.csv",
        "July":    r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\All_results\ViT\metadata_full_July_updated.csv",
        "October": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_October\Results\ViT\metadata_with_predictions.csv",
    },
    "All-seasons model": {
        "March":   r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\merged_March.csv",
        "July":    r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\merged_July.csv",
        "October": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\merged_October.csv",
    }
}

OUT_DIR = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\baseline_vs_all_season"
os.makedirs(OUT_DIR, exist_ok=True)

# Warm‐tone palette
palette = {
    "July-only model":   "#97a6c4",  
    "All-seasons model":  "#384860", 
}

# ─── 1. MACRO-F1 PER SEASON ───────────────────────────────────────────────────
season_rows = []
for model_name, months in models.items():
    for season, csv_path in months.items():
        df = pd.read_csv(csv_path)
        m = f1_score(df["true_label"], df["pred_label"], average="macro")
        season_rows.append({
            "Model": model_name,
            "Season": season,
            "Macro F1": m
        })
season_df = pd.DataFrame(season_rows)

# ─── 2. OVERALL MACRO-F1 (ALL DATA COMBINED) ─────────────────────────────────
overall_rows = []
for model_name, months in models.items():
    parts = [pd.read_csv(p)[["true_label","pred_label"]] for p in months.values()]
    concat = pd.concat(parts, ignore_index=True)
    m_all = f1_score(concat["true_label"], concat["pred_label"], average="macro")
    overall_rows.append({"Model": model_name, "Macro F1": m_all})
overall_df = pd.DataFrame(overall_rows)

# ─── 3. ERROR RATE PER CLASS (ALL DATA COMBINED) ──────────────────────────────
error_rows = []
for model_name, months in models.items():
    parts = [pd.read_csv(p) for p in months.values()]
    df_all = pd.concat(parts, ignore_index=True)
    classes = sorted(df_all["true_label"].unique())
    for cls in classes:
        sub = df_all[df_all["true_label"] == cls]
        err = 100 * (1 - accuracy_score(sub["true_label"], sub["pred_label"]))
        error_rows.append({
            "Model": model_name,
            "Class": cls,
            "Error Rate (%)": err
        })
error_df = pd.DataFrame(error_rows)

# ─── 4. PLOTTING ──────────────────────────────────────────────────────────────
sns.set_theme(style="whitegrid")

# 4a. Macro-F1 by Season
plt.figure(figsize=(7,4))
sns.barplot(
    data=season_df, x="Season", y="Macro F1", hue="Model",
    palette=palette
)
plt.ylim(0,1)
plt.ylabel("Macro F1")
plt.title("Macro F1 by Season")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "macro_f1_by_season.png"), dpi = 300)
plt.close()

# 4b. Overall Macro-F1
plt.figure(figsize=(4,4))
sns.barplot(
    data=overall_df, x="Model", y="Macro F1",
    palette=palette, order=["July-only model","All-seasons model"]
)
plt.ylim(0,1)
plt.ylabel("Macro F1")
plt.title("Overall Macro F1")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "macro_f1_overall.png"), dpi = 300)
plt.close()

# 4c. Error Rate per Class (Combined)
plt.figure(figsize=(8,5))
sns.barplot(
    data=error_df, x="Class", y="Error Rate (%)", hue="Model",
    palette=palette
)
plt.xticks(rotation=45, ha="right")
plt.ylabel("Error Rate (%)")
plt.title("Error Rate per Class (All Months Combined)")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "error_rate_per_class_combined.png"), dpi = 300)
plt.close()

print("Done! Outputs written to:", OUT_DIR)



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(


Done! Outputs written to: C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\baseline_vs_all_season


## combined csv of all seasons

In [9]:
import os
import pandas as pd

# ─── POINT TO YOUR THREE MERGED FILES ────────────────────────────────────────
root = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred"
files = [
    os.path.join(root, "merged_March.csv"),
    os.path.join(root, "merged_July.csv"),
    os.path.join(root, "merged_October.csv"),
]

# ─── 1) LOAD & CONCAT ───────────────────────────────────────────────────────
dfs = [pd.read_csv(fp) for fp in files]
df_all = pd.concat(dfs, ignore_index=True)

# ─── 2) OPTIONAL: VERIFY ────────────────────────────────────────────────────
print("Total patches:", len(df_all))
print("Seasons present:", df_all["season"].value_counts().to_dict())
assert set(df_all["season"]) == {"March","July","October"}

# ─── 3) SAVE THE COMBINED CSV ───────────────────────────────────────────────
out = os.path.join(root, "all_seasons_full_predictions.csv")
df_all.to_csv(out, index=False)
print("Saved combined file to:", out)



Total patches: 6070
Seasons present: {'July': 2200, 'October': 2100, 'March': 1770}
Saved combined file to: C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\all_seasons_full_predictions.csv


## all seasons model alone analysis

In [5]:
from sklearn.metrics import f1_score

# ─── CONFIG ────────────────────────────────────────────────────────────────────
INPUT_CSV = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\all_seasons_full_predictions.csv"
OUT_DIR   = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Overleaf\Results\Full_trained_best_model"
os.makedirs(OUT_DIR, exist_ok=True)

sns.set_theme(style="whitegrid")
SUN_COLOR     = "#f1a226"  # gold–orange
SHADOW_COLOR  = "#707070"  # medium gray
BAR_COLOR     = "#384860"  # red (for other single‐model bars)

# ─── LOAD DATA ────────────────────────────────────────────────────────────────
df = pd.read_csv(INPUT_CSV)

# ensure columns exist:
assert {"true_label","pred_label","brightness_mean","light_condition"} <= set(df.columns)

# mark correctness
df["correct"] = df["true_label"] == df["pred_label"]


# ─── 2) PER-CLASS MISCLASSIFICATION RATE ──────────────────────────────────────
misrate = (
    df.assign(mis=lambda d: d["correct"]==False)
      .groupby("true_label")
      .mis
      .mean()  # fraction misclassified
      .mul(100)
      .reset_index(name="misclass_pct")
)

plt.figure(figsize=(8,5))
sns.barplot(
    data=misrate,
    x="true_label", y="misclass_pct",
    palette="viridis"
)
plt.xticks(rotation=45, ha="right")
plt.xlabel("Class")
plt.ylabel("Misclassification Rate (%)")
plt.title("Overall Misclassification Rate by Class")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "misclassification_rate_per_class.png"),
            dpi=300, bbox_inches="tight")
plt.close()

# ─── 3) BRIGHTNESS VS CORRECTNESS BOXPLOT ────────────────────────────────────
plt.figure(figsize=(6,4))
# Ensure the palette keys are strings, as seaborn converts boolean values to strings for categorical axes
sns.boxplot(
    data=df,
    x="correct", y="brightness_mean",
    palette={"True": SUN_COLOR, "False": SHADOW_COLOR}
)
plt.xlabel("Correct Prediction")
plt.ylabel("Brightness (mean)")
plt.title("Brightness Distribution\nCorrect vs Incorrect")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "brightness_vs_correctness.png"),
            dpi=300, bbox_inches="tight")
plt.close()

# ─── 4) MACRO F1 SUN vs SHADOW ────────────────────────────────────────────────
scores = {
    "Sun":    f1_score(df[df["light_condition"]=="sun"]["true_label"],
                       df[df["light_condition"]=="sun"]["pred_label"],
                       average="macro"),
    "Shadow": f1_score(df[df["light_condition"]=="shadow"]["true_label"],
                       df[df["light_condition"]=="shadow"]["pred_label"],
                       average="macro")
}

plt.figure(figsize=(5,4))
sns.barplot(
    x=list(scores.keys()),
    y=list(scores.values()),
    palette=[SUN_COLOR, SHADOW_COLOR]
)
plt.ylim(0,1)
plt.ylabel("Macro F1")
plt.title("Macro F1: Sun vs Shadow")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "macro_f1_sun_vs_shadow.png"),
            dpi=300, bbox_inches="tight")
plt.close()

print("All plots saved to:", OUT_DIR)



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(


All plots saved to: C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Overleaf\Results\Full_trained_best_model



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(


In [2]:
import os
import shutil
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# ─── 0. CONFIG ────────────────────────────────────────────────────────────────
INPUT_CSV = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\all_seasons_full_predictions.csv"
OUT_DIR   = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Overleaf\Results\Full_trained_best_model"
os.makedirs(OUT_DIR, exist_ok=True)

# where to find your resized test patches, per season:
TEST_PATCH_DIRS = {
    "March":   r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\test_patch_march_resized",
    "July":    r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\test_patch_july_resized",
    "October": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\test_patch_october_resized",
}

sns.set_theme(style="whitegrid")

# ─── 1. LOAD & CM ─────────────────────────────────────────────────────────────
df = pd.read_csv(INPUT_CSV)
y_true = df["true_label"]
y_pred = df["pred_label"]
labels = sorted(df["true_label"].unique())

cm = confusion_matrix(y_true, y_pred, labels=labels)
cm_df = pd.DataFrame(cm, index=labels, columns=labels)

# save CSV
cm_csv = os.path.join(OUT_DIR, "confusion_matrix.csv")
cm_df.to_csv(cm_csv)

# plot heatmap
plt.figure(figsize=(8,6))
sns.heatmap(
    cm_df,
    annot=True, fmt="d",
    cmap="Blues",
    vmin=0, vmax=cm_df.values.max(),
    cbar=True,
    square=True,
    linewidths=0.5, linecolor="white",
    xticklabels=labels, yticklabels=labels
)
plt.xlabel("Predicted", labelpad=10)
plt.ylabel("True", labelpad=10)
plt.title("Confusion Matrix (All Seasons)")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "confusion_matrix.png"), dpi=300, bbox_inches="tight")
plt.close()

print(f"Confusion matrix saved to:\n  {cm_csv}\n  {os.path.join(OUT_DIR,'confusion_matrix.png')}")

# ─── 2. EXPORT MISCLASSIFIED PATCHES ─────────────────────────────────────────
base_out = os.path.join(OUT_DIR, "misclassified_patches")
os.makedirs(base_out, exist_ok=True)

failed = 0
for _, row in df[df["true_label"] != df["pred_label"]].iterrows():
    fn        = row["patch_filename"]
    true_cls  = row["true_label"]
    pred_cls  = row["pred_label"]
    season    = row["season"]
    src_root  = TEST_PATCH_DIRS.get(season)
    out_folder= os.path.join(base_out, f"{true_cls}_to_{pred_cls}")
    os.makedirs(out_folder, exist_ok=True)

    # find the actual file under src_root
    found = False
    for root, _, files in os.walk(src_root):
        for f in files:
            # normalize: strip Roboflow hash suffix
            base = os.path.splitext(f)[0].split("_jpg")[0] + ".jpg"
            if base == fn:
                shutil.copy(os.path.join(root, f), out_folder)
                found = True
                break
        if found:
            break

    if not found:
        failed += 1
        print(f"⚠️  Could not find patch {fn} in {season} directory")

print(f"\nDone! Misclassified patches copied into:\n  {base_out}")
if failed:
    print(f"{failed} patches were not found and therefore not copied.")


Confusion matrix saved to:
  C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Overleaf\Results\Full_trained_best_model\confusion_matrix.csv
  C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Overleaf\Results\Full_trained_best_model\confusion_matrix.png

Done! Misclassified patches copied into:
  C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Overleaf\Results\Full_trained_best_model\misclassified_patches
