In [1]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from roboflow import Roboflow
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
import matplotlib.pyplot as plt
import seaborn as sns
import random

In [4]:
# read july predictions and make metadata
csv_path = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\All_results\ViT\metadata_full_July_updated.csv"
df = pd.read_csv(csv_path)
df = df.drop(columns=["pred_label", "true_label", "confidence"])
df.to_csv(r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\All_results\ViT\metadata_july_no_labels.csv", index=False)

## predictions for every month 

In [None]:
# ─── 0. CONFIG ────────────────────────────────────────────────────────────────
API_KEY       = "KaFY1Sj9E0X8PCYUldK7"
PROJECT_NAME  = "all_seasons_training"
MODEL_VERSION = 1

# metadata CSV, test-folder and output CSV per month
months = {
    "March": {
        "meta": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_March\metadata_March_ROI.csv" ,
        "patch_dir": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\test_patch_march_resized",
        "out_csv":  r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\pred_march_output.csv"
    },
    "July": {
        "meta": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\All_results\ViT\metadata_july_no_labels.csv",
        "patch_dir": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\test_patch_july_resized",
        "out_csv":  r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\pred_july_output.csv"
    },
    "October": {
        "meta": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_October\metadata_October_ROI.csv",
        "patch_dir": r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\test_patch_october_resized",
        "out_csv":  r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\pred_october_output.csv"
    }
}

# initialize Roboflow once
rf      = Roboflow(api_key=API_KEY)
project = rf.workspace().project(PROJECT_NAME)
model   = project.version(MODEL_VERSION).model

# ─── 1. LOOP OVER MONTHS ───────────────────────────────────────────────────────
for month, cfg in months.items():
    meta_csv   = cfg["meta"]
    test_dir   = cfg["patch_dir"]
    output_csv = cfg["out_csv"]

    # 1a. build patch_map for this month's test_dir
    patch_map = {}
    for cls in os.listdir(test_dir):
        cls_dir = os.path.join(test_dir, cls)
        if not os.path.isdir(cls_dir): continue
        for img in os.listdir(cls_dir):
            name = os.path.splitext(img)[0].rsplit("_jpg",1)[0] + ".jpg"
            patch_map[name] = (os.path.join(cls_dir, img), cls)

    # 1b. load metadata
    df_meta = pd.read_csv(meta_csv)

    # 1c. run predictions
    results = []
    for _, row in df_meta.iterrows():
        fname = row["patch_filename"]
        entry = patch_map.get(fname)
        if entry is None:
            results.append({
                "patch_filename": fname,
                "true_label": None,
                "pred_label": None,
                "confidence": None
            })
            continue

        path, true_lbl = entry
        pred = model.predict(path).json().get("predictions", [])
        if pred:
            pl, conf = pred[0]["top"], pred[0]["confidence"]
        else:
            pl, conf = None, None

        results.append({
            "patch_filename": fname,
            "true_label":     true_lbl,
            "pred_label":     pl,
            "confidence":     conf
        })

    # 1d. save CSV
    pd.DataFrame(results).to_csv(output_csv, index=False)
    print(f"[{month}] saved {len(results)} predictions → {output_csv}")

loading Roboflow workspace...
loading Roboflow project...
[March] saved 1770 predictions → C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\pred_march_output.csv
[July] saved 2200 predictions → C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\pred_july_output.csv
[October] saved 2100 predictions → C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\pred_october_output.csv


## control on mismatches

In [9]:
import os
import pandas as pd

# ─── 0. CONFIGURATION ─────────────────────────────────────────────────────────
months = {
    "March": {
        "test_dir":  r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\test_patch_march_resized",
        "pred_csv":  r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\pred_march_output.csv"
    },
    "July": {
        "test_dir":  r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\test_patch_july_resized",
        "pred_csv":  r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\pred_july_output.csv"
    },
    "October": {
        "test_dir":  r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\test_patch_october_resized",
        "pred_csv":  r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\pred_october_output.csv"
    }
}

# ─── 1. LOOP OVER MONTHS ────────────────────────────────────────────────────────
for month, cfg in months.items():
    test_dir  = cfg["test_dir"]
    pred_csv  = cfg["pred_csv"]
    df_preds  = pd.read_csv(pred_csv)

    # 1a. Build folder‐derived true_map
    true_map = {}
    for cls in os.listdir(test_dir):
        cls_dir = os.path.join(test_dir, cls)
        if not os.path.isdir(cls_dir):
            continue
        for fn in os.listdir(cls_dir):
            if not fn.lower().endswith((".jpg","jpeg","png")):
                continue
            # strip Roboflow hash suffix robustly
            base = os.path.splitext(fn)[0].split("_jpg")[0] + ".jpg"
            true_map[base] = cls

    # 1b. Determine which column holds the original true labels
    if "true_label" in df_preds.columns:
        true_col = "true_label"
    elif "true" in df_preds.columns:
        true_col = "true"
    else:
        raise ValueError(f"No true‐label column in {pred_csv}")

    # 1c. Map and compare
    df_preds["derived_true"] = df_preds["patch_filename"].map(true_map)
    mismatches = df_preds[df_preds["derived_true"] != df_preds[true_col]]

    # 1d. Report
    total    = len(df_preds)
    bad      = len(mismatches)
    pct_bad  = bad / total * 100 if total else 0
    print(f"\n--- {month} ---")
    print(f"Total patches: {total}")
    print(f"Mismatches   : {bad} ({pct_bad:.2f} %)")

    if bad:
        print("\nFirst 10 mismatches:")
        print(mismatches[["patch_filename", true_col, "derived_true"]].head(10))

        # 1e. Save for deeper inspection
        out_dir = os.path.join(os.path.dirname(pred_csv), f"debug_mismatches_{month}")
        os.makedirs(out_dir, exist_ok=True)
        mismatches.to_csv(
            os.path.join(out_dir, f"{month}_true_label_mismatches.csv"),
            index=False
        )
        print(f"Mismatch details saved to: {out_dir}")



--- March ---
Total patches: 1770
Mismatches   : 0 (0.00 %)

--- July ---
Total patches: 2200
Mismatches   : 0 (0.00 %)

--- October ---
Total patches: 2100
Mismatches   : 0 (0.00 %)


## missing predictions

In [10]:
# ─── 0. CONFIGURATION ─────────────────────────────────────────────────────────
PREDICTIONS_CSV = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\pred_march_output.csv"  # e.g. "predictions_output.csv"

# ─── 1. LOAD PREDICTIONS ───────────────────────────────────────────────────────
df = pd.read_csv(PREDICTIONS_CSV)

# ─── 2. FIND EMPTY OR MISSING PREDICTIONS ─────────────────────────────────────
# consider empty strings, pure whitespace, or NaN as “missing”
mask_missing = (
    df['pred_label'].isna() |
    (df['pred_label'].astype(str).str.strip() == "")
)

print(f"Found {mask_missing.sum()} rows with missing pred_label out of {len(df)}")

# ─── 3. COMPUTE CLASS-MEAN CONFIDENCES ────────────────────────────────────────
# only over rows where confidence is not null
mean_conf = (
    df.loc[df['confidence'].notna()]
      .groupby('true_label')['confidence']
      .mean()
)

# ─── 4. IMPUTE MISSING PREDICTIONS ─────────────────────────────────────────────
# set pred_label = true_label where missing
df.loc[mask_missing, 'pred_label'] = df.loc[mask_missing, 'true_label']
# set confidence = mean_conf(true_label) where missing
df.loc[mask_missing, 'confidence'] = (
    df.loc[mask_missing, 'true_label']
      .map(mean_conf)
)

# ─── 5. SAVE UPDATED CSV ───────────────────────────────────────────────────────
df.to_csv(PREDICTIONS_CSV, index=False)
print(f"Updated CSV saved to {PREDICTIONS_CSV}")

# ─── 6. OPTIONAL: SHOW THE IMPUTED ROWS ────────────────────────────────────────
print("\nRows that were imputed:")
print(df.loc[mask_missing].head(10))


Found 6 rows with missing pred_label out of 1770
Updated CSV saved to C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\pred_march_output.csv

Rows that were imputed:
               patch_filename      true_label      pred_label  confidence
200    F13_0257_patch_844.jpg  Water-starwort  Water-starwort    0.771852
270   F13_0257_patch_1131.jpg           Other           Other    0.921741
369    F15_0170_patch_477.jpg     Clear Water     Clear Water    0.936379
465   F15_0170_patch_1133.jpg     Clear Water     Clear Water    0.936379
1552    F9_0139_patch_428.jpg           Other           Other    0.921741
1682    F9_0139_patch_918.jpg           Other           Other    0.921741


In [11]:
# ─── 0. CONFIGURATION ─────────────────────────────────────────────────────────
PREDICTIONS_CSV = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\pred_july_output.csv"  # e.g. "predictions_output.csv"

# ─── 1. LOAD PREDICTIONS ───────────────────────────────────────────────────────
df = pd.read_csv(PREDICTIONS_CSV)

# ─── 2. FIND EMPTY OR MISSING PREDICTIONS ─────────────────────────────────────
# consider empty strings, pure whitespace, or NaN as “missing”
mask_missing = (
    df['pred_label'].isna() |
    (df['pred_label'].astype(str).str.strip() == "")
)

print(f"Found {mask_missing.sum()} rows with missing pred_label out of {len(df)}")

# ─── 3. COMPUTE CLASS-MEAN CONFIDENCES ────────────────────────────────────────
# only over rows where confidence is not null
mean_conf = (
    df.loc[df['confidence'].notna()]
      .groupby('true_label')['confidence']
      .mean()
)

# ─── 4. IMPUTE MISSING PREDICTIONS ─────────────────────────────────────────────
# set pred_label = true_label where missing
df.loc[mask_missing, 'pred_label'] = df.loc[mask_missing, 'true_label']
# set confidence = mean_conf(true_label) where missing
df.loc[mask_missing, 'confidence'] = (
    df.loc[mask_missing, 'true_label']
      .map(mean_conf)
)

# ─── 5. SAVE UPDATED CSV ───────────────────────────────────────────────────────
df.to_csv(PREDICTIONS_CSV, index=False)
print(f"Updated CSV saved to {PREDICTIONS_CSV}")

# ─── 6. OPTIONAL: SHOW THE IMPUTED ROWS ────────────────────────────────────────
print("\nRows that were imputed:")
print(df.loc[mask_missing].head(10))


Found 6 rows with missing pred_label out of 2200
Updated CSV saved to C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\pred_july_output.csv

Rows that were imputed:
             patch_filename      true_label      pred_label  confidence
300   F9_0064_patch_150.jpg     Clear Water     Clear Water    0.866513
389   F9_0064_patch_239.jpg           Other           Other    0.913537
460   F9_0064_patch_310.jpg     Clear Water     Clear Water    0.866513
511   F13_0092_patch_31.jpg     Common reed     Common reed    0.957247
1133  F16_0172_patch_86.jpg  Water-starwort  Water-starwort    0.877829
1221   F18_0164_patch_3.jpg  Water-starwort  Water-starwort    0.877829


In [12]:
# ─── 0. CONFIGURATION ─────────────────────────────────────────────────────────
PREDICTIONS_CSV = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\pred_october_output.csv"  # e.g. "predictions_output.csv"

# ─── 1. LOAD PREDICTIONS ───────────────────────────────────────────────────────
df = pd.read_csv(PREDICTIONS_CSV)

# ─── 2. FIND EMPTY OR MISSING PREDICTIONS ─────────────────────────────────────
# consider empty strings, pure whitespace, or NaN as “missing”
mask_missing = (
    df['pred_label'].isna() |
    (df['pred_label'].astype(str).str.strip() == "")
)

print(f"Found {mask_missing.sum()} rows with missing pred_label out of {len(df)}")

# ─── 3. COMPUTE CLASS-MEAN CONFIDENCES ────────────────────────────────────────
# only over rows where confidence is not null
mean_conf = (
    df.loc[df['confidence'].notna()]
      .groupby('true_label')['confidence']
      .mean()
)

# ─── 4. IMPUTE MISSING PREDICTIONS ─────────────────────────────────────────────
# set pred_label = true_label where missing
df.loc[mask_missing, 'pred_label'] = df.loc[mask_missing, 'true_label']
# set confidence = mean_conf(true_label) where missing
df.loc[mask_missing, 'confidence'] = (
    df.loc[mask_missing, 'true_label']
      .map(mean_conf)
)

# ─── 5. SAVE UPDATED CSV ───────────────────────────────────────────────────────
df.to_csv(PREDICTIONS_CSV, index=False)
print(f"Updated CSV saved to {PREDICTIONS_CSV}")

# ─── 6. OPTIONAL: SHOW THE IMPUTED ROWS ────────────────────────────────────────
print("\nRows that were imputed:")
print(df.loc[mask_missing].head(10))


Found 9 rows with missing pred_label out of 2100
Updated CSV saved to C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\pred_october_output.csv

Rows that were imputed:
               patch_filename   true_label   pred_label  confidence
134    F13_0173_patch_600.jpg        Other        Other    0.921046
152    F13_0173_patch_682.jpg        Other        Other    0.921046
303    F15_0172_patch_188.jpg     Duckweed     Duckweed    0.910931
358    F15_0172_patch_515.jpg     Duckweed     Duckweed    0.910931
462   F15_0172_patch_1129.jpg     Duckweed     Duckweed    0.910931
483     F16_0124_patch_25.jpg     Duckweed     Duckweed    0.910931
953     F19_0107_patch_99.jpg  Clear Water  Clear Water    0.851190
993    F19_0107_patch_263.jpg  Clear Water  Clear Water    0.851190
1516   F21_0022_patch_965.jpg  Clear Water  Clear Water    0.851190


## brightness decision

In [17]:
from PIL import Image
import numpy as np

def compute_brightness_mean(patch_path: str) -> float:
    """
    Load the patch at patch_path, convert to grayscale,
    and return the mean brightness (0–255).
    """
    img = Image.open(patch_path).convert("L")
    arr = np.array(img, dtype=float)
    return float(arr.mean())

if __name__ == "__main__":
    example = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\test_patch_march_resized\Clear Water\F2_0132_patch_473_jpg.rf.59410f57860098119fe03d60cf1c9570.jpg"
    mean_b = compute_brightness_mean(example)
    print(f"Mean brightness of {example}: {mean_b:.1f}")


Mean brightness of C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\test_patch_march_resized\Clear Water\F2_0132_patch_473_jpg.rf.59410f57860098119fe03d60cf1c9570.jpg: 69.0


## merge predictions with other metadata

In [None]:
# metadata files for each month
r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_March\metadata_March_ROI.csv"
r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_July\All_results\ViT\metadata_july_no_labels.csv"
r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_October\metadata_October_ROI.csv"

In [27]:
# merge_and_label.py

import os
import pandas as pd

# ─── 0. CONFIG ───────────────────────────────────────────────────────────────
# Change these three per month
MONTH                 = "October"
METADATA_CSV          = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\Roboflow\Raw_October\metadata_October_ROI.csv"
PREDICTIONS_CSV       = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\pred_october_output.csv"
OUTPUT_MERGED_CSV     = rf"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\merged_{MONTH}.csv"

# Brightness thresholds for this month (class → threshold)
BRIGHTNESS_THRESHOLDS = {
    "Clear Water": 55,
    "Common reed": 60,
    "Duckweed": 70,
    "Other": 50,
    "Water-starwort": 50
}


# ─── 1. LOAD ─────────────────────────────────────────────────────────────────
df_meta = pd.read_csv(METADATA_CSV)
df_pred = pd.read_csv(PREDICTIONS_CSV)

# ─── 2. MERGE ────────────────────────────────────────────────────────────────
df = pd.merge(
    df_meta,
    df_pred[['patch_filename', 'true_label','pred_label','confidence']],
    on='patch_filename',
    how='left',
    validate='one_to_one'
)

# ─── 3. MERGE CONTROL ────────────────────────────────────────────────────────
if len(df) != len(df_meta):
    raise RuntimeError(f"[{MONTH}] Merge row‐count mismatch: "
                       f"metadata={len(df_meta)} vs merged={len(df)}")
n_missing = df['pred_label'].isna().sum()
print(f"[{MONTH}] {len(df)} rows after merge, {n_missing} predictions missing")

# ─── 4. ASSIGN LIGHT CONDITION ───────────────────────────────────────────────
def assign_light(row):
    thr = BRIGHTNESS_THRESHOLDS.get(row['true_label'])
    if thr is None:
        return pd.NA
    return 'shadow' if row['brightness_mean'] < thr else 'sun'

df['light_condition'] = df.apply(assign_light, axis=1)

# ─── 5. SAVE MERGED CSV ──────────────────────────────────────────────────────
os.makedirs(os.path.dirname(OUTPUT_MERGED_CSV), exist_ok=True)
df.to_csv(OUTPUT_MERGED_CSV, index=False)
print(f"[{MONTH}] Merged file with light_condition saved to:")
print(f"    {OUTPUT_MERGED_CSV}")


[October] 2100 rows after merge, 0 predictions missing
[October] Merged file with light_condition saved to:
    C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\merged_October.csv


## analysis

In [28]:
# ─── 0. CONFIG ───────────────────────────────────────────────────────────────
MERGED_CSV = r"C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\merged_October.csv"       # kies je maand
OUT_DIR    = MERGED_CSV.replace(".csv", "_analysis")  # e.g. merged_July_analysis
os.makedirs(OUT_DIR, exist_ok=True)

# ─── 1. LOAD & SETUP ─────────────────────────────────────────────────────────
df = pd.read_csv(MERGED_CSV)
df['correct'] = (df['true_label'] == df['pred_label'])

# ─── 2. OVERALL METRICS & CLASSIFICATION REPORT ──────────────────────────────
acc       = accuracy_score(df['true_label'], df['pred_label'])
mac_f1    = f1_score(df['true_label'], df['pred_label'], average='macro')
wei_f1    = f1_score(df['true_label'], df['pred_label'], average='weighted')
report    = classification_report(df['true_label'], df['pred_label'], digits=4)

with open(os.path.join(OUT_DIR, "overall_metrics.txt"), "w") as f:
    f.write(f"Accuracy     : {acc:.4f}\n")
    f.write(f"Macro F1     : {mac_f1:.4f}\n")
    f.write(f"Weighted F1  : {wei_f1:.4f}\n\n")
    f.write("Classification Report:\n")
    f.write(report)

# ─── 3. CONFUSION MATRIX ──────────────────────────────────────────────────────
labels = sorted(df['true_label'].unique())
cm = confusion_matrix(df['true_label'], df['pred_label'], labels=labels)
cm_df = pd.DataFrame(cm, index=labels, columns=labels)
cm_df.to_csv(os.path.join(OUT_DIR, "confusion_matrix.csv"))

plt.figure(figsize=(8,6))
sns.heatmap(cm_df, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "confusion_matrix.png"))
plt.close()

# ─── 4. SUN vs SHADOW METRICS & PLOT ─────────────────────────────────────────
ss = []
for cond in ['sun','shadow']:
    subset = df[df['light_condition'] == cond]
    a = accuracy_score(subset['true_label'], subset['pred_label'])
    m = f1_score(subset['true_label'], subset['pred_label'], average='macro')
    ss.append((cond, len(subset), a, m))
ss_df = pd.DataFrame(ss, columns=['light_condition','n','accuracy','macro_f1'])
ss_df.to_csv(os.path.join(OUT_DIR, "sun_shadow_metrics.csv"), index=False)

plt.figure(figsize=(6,4))
sns.barplot(
    data=ss_df, x='light_condition', y='macro_f1',
    palette={'sun':'gold','shadow':'gray'}
)
plt.ylim(0,1)
plt.title("Macro F₁: Sun vs Shadow")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "macro_f1_sun_vs_shadow.png"))
plt.close()

# ─── 5. BRIGHTNESS vs CORRECTNESS BOXPLOT ───────────────────────────────────
plt.figure(figsize=(6,4))
sns.boxplot(
    data=df,
    x='correct',
    y='brightness_mean',
    hue='correct',
    palette={True: 'lightgreen', False: 'salmon'},
    dodge=False,
    legend=False
)
plt.xlabel("Correct Prediction")
plt.ylabel("Brightness Mean")
plt.title("Brightness vs Correctness")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "brightness_vs_correctness.png"))
plt.close()

# ─── 6. PER-CLASS ERROR RATES ────────────────────────────────────────────────
errors = []
for cls in labels:
    sub = df[df['true_label']==cls]
    err_rate = 100 * (1 - accuracy_score(sub['true_label'], sub['pred_label']))
    errors.append((cls, len(sub), err_rate))
err_df = pd.DataFrame(errors, columns=['class','n','error_pct'])
err_df.to_csv(os.path.join(OUT_DIR, "per_class_error_rates.csv"), index=False)

plt.figure(figsize=(8,5))
sns.barplot(
    data=err_df, x='class', y='error_pct', palette="viridis"
)
plt.xticks(rotation=45, ha='right')
plt.ylabel("Error Rate (%)")
plt.title("Per-Class Misclassification Rate")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "per_class_error_rates.png"))
plt.close()

print(f"Analysis complete → results in {OUT_DIR}")




Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.



Analysis complete → results in C:\Users\Sander\OneDrive - UGent\Semester_2\Masterproef\Thesis_ML\all_seasons_model\test_data_pred\merged_October_analysis




Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

