In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
import os

# ==============================
# CONFIG (same as your original)
# ==============================
DATA_DIR = r"E:\ASL_Citizen\NEW\Top_Classes_Landmarks_Preprocessed"
TARGET_FRAMES = 157
FEATURE_DIM = 438
BATCH_SIZE = 16
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

MODEL_PATHS = {
    "Hands Only + Sliding": "CNN_hands_only_sliding_mask_best_model.pth",
    "All Features + Sliding": "CNN_with_sliding&mask_best_model.pth",
    "All Features w/o Sliding": "CNN_without_sliding&mask_best_model.pth",
    "Hands + Face + Sliding": "CNN_hands_face_sliding_mask_best_model.pth",
    "Hands + Pose + Sliding": "CNN_hands_pose_sliding_mask_best_model.pth"
}

# ==============================
# DATASET
# ==============================
class LandmarkDataset(Dataset):
    def __init__(self, files, labels):
        self.files = files
        self.labels = labels

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        x = np.load(self.files[idx]).astype(np.float32)
        x = torch.tensor(x).permute(1,0)  # shape: (features, frames)
        y = torch.tensor(self.labels[idx], dtype=torch.long)
        return x, y

# ==============================
# LOAD FILES & LABELS
# ==============================
files, labels = [], []

for f in os.listdir(DATA_DIR):
    if f.endswith(".npy") and "_mask" not in f:
        files.append(os.path.join(DATA_DIR, f))
        gloss = f.rsplit("_", 1)[0].split("_")[0]
        labels.append(gloss)

le = LabelEncoder()
y_encoded = le.fit_transform(labels)

# Filter classes with <2 samples
label_counts = Counter(y_encoded)
valid_idx = [i for i, y in enumerate(y_encoded) if label_counts[y] >= 2]

files = [files[i] for i in valid_idx]
y_encoded = y_encoded[valid_idx]

le = LabelEncoder()
y_encoded = le.fit_transform(y_encoded)
num_classes = len(le.classes_)

# Train/Val/Test split (same as original)
from sklearn.model_selection import train_test_split
files_train, files_tmp, y_train, y_tmp = train_test_split(
    files, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42
)
files_val, files_test, y_val, y_test = train_test_split(
    files_tmp, y_tmp, test_size=0.5, stratify=y_tmp, random_state=42
)

test_dataset = LandmarkDataset(files_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# ==============================
# MODEL
# ==============================
class CNN1D(nn.Module):
    def __init__(self, input_features, num_classes):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(input_features, 256, 3, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.1),

            nn.Conv1d(256, 128, 3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.05),

            nn.Conv1d(128, 64, 3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.AdaptiveMaxPool1d(1)
        )
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.conv(x)
        x = x.squeeze(-1)
        return self.fc(x)

# ==============================
# EVALUATION FUNCTION
# ==============================
def evaluate_model(model_path):
    model = CNN1D(FEATURE_DIM, num_classes).to(DEVICE)
    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
    model.eval()

    all_preds, all_labels, all_probs = [], [], []

    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.to(DEVICE), y.to(DEVICE)
            outputs = model(x)
            probs = torch.softmax(outputs, dim=1)
            preds = outputs.argmax(1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(y.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    per_class_acc = []
    for i in range(num_classes):
        idxs = np.where(np.array(all_labels) == i)[0]
        class_acc = (np.array(all_preds)[idxs] == np.array(all_labels)[idxs]).mean() if len(idxs)>0 else 0
        per_class_acc.append(class_acc)

    return acc, per_class_acc, np.array(all_probs)

# ==============================
# EVALUATE ALL MODELS
# ==============================
results = {}
for name, path in MODEL_PATHS.items():
    acc, per_class_acc, probs = evaluate_model(path)
    results[name] = {"accuracy": acc, "per_class_acc": per_class_acc, "probs": probs}
    print(f"{name} Test Accuracy: {acc:.4f}")

# ==============================
# VISUALIZATIONS
# ==============================

# 1️⃣ Overall Accuracy Comparison
plt.figure(figsize=(10,6))
plt.bar(results.keys(), [r["accuracy"] for r in results.values()], color='skyblue')
plt.ylabel("Test Accuracy")
plt.title("Overall Test Accuracy Comparison")
plt.xticks(rotation=45, ha='right')
plt.ylim(0,1)
plt.show()

# 2️⃣ Per-Class Accuracy Heatmap
plt.figure(figsize=(12,6))
per_class_matrix = np.array([r["per_class_acc"] for r in results.values()])
sns.heatmap(per_class_matrix, annot=True, fmt=".2f", cmap='YlGnBu', xticklabels=[f"C{i}" for i in range(num_classes)], yticklabels=results.keys())
plt.xlabel("Class Index")
plt.ylabel("Model")
plt.title("Per-Class Accuracy Comparison")
plt.show()

# 3️⃣ Prediction Confidence Distribution (Top-1 probs)
plt.figure(figsize=(12,6))
for name, r in results.items():
    top1_probs = r["probs"].max(axis=1)
    sns.kdeplot(top1_probs, label=name, fill=True)
plt.xlabel("Prediction Confidence (Top-1)")
plt.ylabel("Density")
plt.title("Prediction Confidence Distribution")
plt.legend()
plt.show()
