In [1]:
import os, glob, pickle, warnings, json
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import tensorflow as tf
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

# ---------------------- CONFIG / PATHS ----------------------
AUG_CLASS_DIRS = [
    r"C:\Users\NXTWAVE\Downloads\Alzheimer’s Detection\archive\AugmentedAlzheimerDataset\VeryMildDemented",
    r"C:\Users\NXTWAVE\Downloads\Alzheimer’s Detection\archive\AugmentedAlzheimerDataset\NonDemented",
    r"C:\Users\NXTWAVE\Downloads\Alzheimer’s Detection\archive\AugmentedAlzheimerDataset\NonDemented",  # (dup is ok; we dedupe)
    r"C:\Users\NXTWAVE\Downloads\Alzheimer’s Detection\archive\AugmentedAlzheimerDataset\MildDemented",
]

OUT_DIR = Path(r"C:\Users\NXTWAVE\Downloads\Alzheimer’s Detection")
OUT_DIR.mkdir(parents=True, exist_ok=True)

BATCH = 32
IMG_SIZE = (224, 224)
SEED = 42

# ---------------------- HELPERS (Unicode-safe IO) ----------------------
def cv2_imread_unicode(path: str) -> np.ndarray:
    """Unicode-safe imread for Windows."""
    data = np.fromfile(path, dtype=np.uint8)
    return cv2.imdecode(data, cv2.IMREAD_COLOR)

def list_images(folder: str):
    exts = ["*.jpg", "*.jpeg", "*.png", "*.bmp", "*.tif", "*.tiff"]
    files = []
    for e in exts:
        files.extend(glob.glob(os.path.join(folder, e)))
    return sorted(files)

def collect_dataset(class_dirs):
    # dedupe while preserving order
    seen = set(); dedup = []
    for p in class_dirs:
        if p not in seen:
            dedup.append(p); seen.add(p)

    paths, labels, counts = [], [], {}
    for cdir in dedup:
        if not Path(cdir).exists():
            print(f"[WARN] Missing folder: {cdir}")
            counts[Path(cdir).name] = 0
            continue
        cname = Path(cdir).name
        imgs = list_images(cdir)
        counts[cname] = len(imgs)
        for p in imgs:
            paths.append(p); labels.append(cname)
    return paths, labels, counts, dedup

def build_tf_dataset(file_paths, img_size, batch_size):
    def tf_load(path):
        def _py(p):
            p = p.numpy().decode("utf-8")
            img_bgr = cv2_imread_unicode(p)
            if img_bgr is None:
                raise FileNotFoundError(f"Could not read: {p}")
            img = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
            if img.ndim == 2:
                img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
            img = cv2.resize(img, (img_size[1], img_size[0]), interpolation=cv2.INTER_AREA)
            return img.astype(np.float32)  # 0..255
        img = tf.py_function(_py, [path], Tout=tf.float32)
        img.set_shape((img_size[0], img_size[1], 3))
        return img
    ds = tf.data.Dataset.from_tensor_slices(file_paths)
    ds = ds.map(tf_load, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

# ---------------------- LOAD MODEL & LABELS ----------------------
# Try best checkpoint, then h5
model_path_best = OUT_DIR / "neurowell_best.keras"
model_path_h5   = OUT_DIR / "neurowell_model.h5"
if model_path_best.exists():
    model = tf.keras.models.load_model(model_path_best, compile=False)
    print(f"[INFO] Loaded model: {model_path_best}")
elif model_path_h5.exists():
    model = tf.keras.models.load_model(model_path_h5, compile=False)
    print(f"[INFO] Loaded model: {model_path_h5}")
else:
    raise FileNotFoundError("No model found in OUT_DIR (neurowell_best.keras or neurowell_model.h5).")

num_classes = int(model.output_shape[-1])

# Load label encoder if present (preferred for correct class order)
le_path = OUT_DIR / "label_encoder.pkl"
label_order = None
if le_path.exists():
    with open(le_path, "rb") as f:
        le_obj = pickle.load(f)
    label_order = [str(c) for c in le_obj.get("classes_", [])]
    if len(label_order) != num_classes:
        print(f"[WARN] Label encoder classes ({len(label_order)}) "
              f"!= model outputs ({num_classes}). Will fallback if needed.")
else:
    print("[WARN] label_encoder.pkl not found — will infer labels from folder names.")

# ---------------------- BUILD FILE LIST ----------------------
file_paths, true_labels, class_counts, used_dirs = collect_dataset(AUG_CLASS_DIRS)
if len(file_paths) == 0:
    raise FileNotFoundError("No images found in the provided Augmented folders.")

# If no label encoder, infer deterministic class order
if label_order is None or len(label_order) != num_classes:
    # Sort by name; if mismatch with model, we still use them for reporting
    label_order = sorted(set(true_labels))
    if len(label_order) != num_classes:
        print(f"[WARN] Dataset classes ({len(label_order)}) != model outputs ({num_classes}). "
              f"Proceeding with mapped indices; predictions will still be correct per index, "
              f"but labels may not align with training order.")
class_to_index = {c: i for i, c in enumerate(label_order)}

# Map each true label to index (unknown labels -> -1)
true_idx = np.array([class_to_index.get(lbl, -1) for lbl in true_labels], dtype=np.int32)

# ---------------------- PREDICT ----------------------
predict_ds = build_tf_dataset(file_paths, IMG_SIZE, BATCH)
probs_list = []
for batch in predict_ds:
    probs = model.predict(batch, verbose=0)
    probs_list.append(probs)
probs_all = np.vstack(probs_list)  # shape (N, num_classes)
pred_idx = probs_all.argmax(axis=1)
conf_all = probs_all.max(axis=1)

# Map indices to label names (best-effort)
def idx_to_label(i):
    if 0 <= i < len(label_order):
        return label_order[i]
    return f"class_{i}"

pred_labels = [idx_to_label(i) for i in pred_idx]

# ---------------------- RESULTS: CSV/JSON & METRICS ----------------------
results_df = pd.DataFrame({
    "file_path": file_paths,
    "true_label": true_labels,
    "true_idx": true_idx,
    "pred_label": pred_labels,
    "pred_idx": pred_idx,
    "confidence": conf_all
})

# Save predictions
pred_csv = OUT_DIR / "augmented_predictions.csv"
pred_json = OUT_DIR / "augmented_predictions.json"
results_df.to_csv(pred_csv, index=False, encoding="utf-8-sig")
results_df.to_json(pred_json, orient="records", force_ascii=False, indent=2)
print(f"[SAVE] {pred_csv}")
print(f"[SAVE] {pred_json}")

# Compute metrics only on rows with valid true_idx
mask_valid = results_df["true_idx"] >= 0
y_true = results_df.loc[mask_valid, "true_idx"].to_numpy()
y_pred = results_df.loc[mask_valid, "pred_idx"].to_numpy()

if len(y_true) > 0:
    acc = accuracy_score(y_true, y_pred)
    print(f"\nOverall accuracy on provided Augmented folders: {acc:.4f} "
          f"({mask_valid.sum()} images with known labels)")

    # Classification report
    # Ensure all classes (0..num_classes-1) have a target name
    target_names = [idx_to_label(i) for i in range(num_classes)]
    report_txt = classification_report(
        y_true, y_pred, labels=list(range(num_classes)),
        target_names=target_names, digits=4, zero_division=0
    )
    rep_path = OUT_DIR / "augmented_classification_report.txt"
    with open(rep_path, "w", encoding="utf-8") as f:
        f.write(report_txt)
    print(report_txt)
    print(f"[SAVE] {rep_path}")

    # Confusion matrices (raw + normalized)
    cm = confusion_matrix(y_true, y_pred, labels=list(range(num_classes)))
    cm_norm = cm.astype(np.float64) / (cm.sum(axis=1, keepdims=True) + 1e-9)

    def plot_cm(matrix, labels, title, out_path, normalize=False):
        plt.figure(figsize=(7,6))
        plt.imshow(matrix, interpolation="nearest", aspect="auto")
        plt.title(title); plt.colorbar(fraction=0.046, pad=0.04)
        ticks = np.arange(len(labels))
        plt.xticks(ticks, labels, rotation=45, ha="right")
        plt.yticks(ticks, labels)
        fmt = ".2f" if normalize else "d"
        thresh = matrix.max() / 2.0 if matrix.size else 0
        for i in range(matrix.shape[0]):
            for j in range(matrix.shape[1]):
                val = format(matrix[i, j], fmt)
                plt.text(j, i, val,
                         ha="center", va="center",
                         color="white" if matrix[i, j] > thresh else "black",
                         fontsize=9)
        plt.ylabel("True label"); plt.xlabel("Predicted label")
        plt.tight_layout(); plt.savefig(out_path, dpi=220); plt.close()
        print(f"[SAVE] {out_path}")

    cm_path  = OUT_DIR / "augmented_confusion_matrix.png"
    cmn_path = OUT_DIR / "augmented_confusion_matrix_normalized.png"
    plot_cm(cm, target_names, "Confusion Matrix (Augmented)", cm_path, normalize=False)
    plot_cm(cm_norm, target_names, "Confusion Matrix (Augmented, Normalized)", cmn_path, normalize=True)
else:
    print("\nNo valid true labels to score against (label mapping mismatch). "
          "Predictions CSV/JSON are still saved.")

# ---------------------- SAMPLE PREVIEW ----------------------
print("\nSample predictions:")
display_cols = ["file_path", "true_label", "pred_label", "confidence"]
print(results_df[display_cols].head(10).to_string(index=False))

print("\nDone. All outputs saved to:", OUT_DIR)





[INFO] Loaded model: C:\Users\NXTWAVE\Downloads\Alzheimer’s Detection\neurowell_best.keras
[SAVE] C:\Users\NXTWAVE\Downloads\Alzheimer’s Detection\augmented_predictions.csv
[SAVE] C:\Users\NXTWAVE\Downloads\Alzheimer’s Detection\augmented_predictions.json

Overall accuracy on provided Augmented folders: 0.3957 (27520 images with known labels)
                  precision    recall  f1-score   support

    MildDemented     0.6182    0.0076    0.0150      8960
ModerateDemented     0.0000    0.0000    0.0000         0
     NonDemented     0.4151    0.8852    0.5651      9600
VeryMildDemented     0.3349    0.2593    0.2923      8960

        accuracy                         0.3957     27520
       macro avg     0.3420    0.2880    0.2181     27520
    weighted avg     0.4551    0.3957    0.2972     27520

[SAVE] C:\Users\NXTWAVE\Downloads\Alzheimer’s Detection\augmented_classification_report.txt
[SAVE] C:\Users\NXTWAVE\Downloads\Alzheimer’s Detection\augmented_confusion_matrix.png
[SAVE]