In [10]:
import os
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from collections import Counter
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

# ==============================
# CONFIG
# ==============================
DATA_DIRS = {
    "Hands Only + Sliding": r"E:\ASL_Citizen\NEW\Top_Classes_Landmarks_Preprocessed",
    "All Features + Sliding": r"E:\ASL_Citizen\NEW\Top_Classes_Landmarks_Preprocessed",
    "All Features w/o Sliding": r"E:\ASL_Citizen\NEW\Top_Classes_Landmarks_Preprocessed_method2",
    "Hands + Face + Sliding": r"E:\ASL_Citizen\NEW\Top_Classes_Landmarks_Preprocessed",
    "Hands + Pose + Sliding": r"E:\ASL_Citizen\NEW\Top_Classes_Landmarks_Preprocessed"
}

MODEL_PATHS = {
    "Hands Only + Sliding": "CNN_hands_only_sliding_mask_best_model.pth",
    "All Features + Sliding": "CNN_with_sliding&mask_best_model.pth",
    "All Features w/o Sliding": "CNN_without_sliding&mask_best_model.pth",
    "Hands + Face + Sliding": "CNN_hands_face_sliding_mask_best_model.pth",
    "Hands + Pose + Sliding": "CNN_hands_pose_sliding_mask_best_model.pth"
}

FEATURE_DIMS = {
    "Hands Only + Sliding": 258,  # Example: trained with 258 features
    "All Features + Sliding": 438,
    "All Features w/o Sliding": 438,
    "Hands + Face + Sliding": 438,
    "Hands + Pose + Sliding": 438
}

BATCH_SIZE = 16
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device:", DEVICE)

# ==============================
# DATASET
# ==============================
class LandmarkDataset(Dataset):
    def __init__(self, files, labels):
        self.files = files
        self.labels = labels

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        x = np.load(self.files[idx]).astype(np.float32)
        x = torch.tensor(x).permute(1, 0)  # (features, frames)
        y = torch.tensor(self.labels[idx], dtype=torch.long)
        return x, y

# ==============================
# LOAD FILES & LABELS
# ==============================
def load_files_and_labels(data_dir):
    files, labels = [], []
    for f in os.listdir(data_dir):
        if f.endswith(".npy") and "_mask" not in f:
            files.append(os.path.join(data_dir, f))
            gloss = f.rsplit("_", 1)[0].split("_")[0]
            labels.append(gloss)

    le = LabelEncoder()
    y_encoded = le.fit_transform(labels)

    # Filter classes with <2 samples
    label_counts = Counter(y_encoded)
    valid_idx = [i for i, y in enumerate(y_encoded) if label_counts[y] >= 2]

    files = [files[i] for i in valid_idx]
    y_encoded = y_encoded[valid_idx]

    le = LabelEncoder()
    y_encoded = le.fit_transform(y_encoded)
    num_classes = len(le.classes_)

    return files, y_encoded, num_classes, le

# ==============================
# CNN MODEL
# ==============================
class CNN1D(nn.Module):
    def __init__(self, input_features, num_classes):
        super().__init__()
        self.conv_block = nn.Sequential(
            nn.Conv1d(input_features, 256, kernel_size=3, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.1),

            nn.Conv1d(256, 128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.05),

            nn.Conv1d(128, 64, kernel_size=3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.AdaptiveMaxPool1d(1)
        )
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.conv_block(x)
        x = x.squeeze(-1)
        return self.fc(x)

# ==============================
# LOAD MODEL WITH CHECKPOINT COMPATIBILITY
# ==============================
def load_model_with_compatibility(model, path):
    """
    Loads a checkpoint while fixing key name differences
    (conv vs conv_block) from old models.
    """
    checkpoint = torch.load(path, map_location=DEVICE)
    new_state_dict = {}

    for k, v in checkpoint.items():
        if k.startswith("conv."):
            new_key = k.replace("conv.", "conv_block.")
            new_state_dict[new_key] = v
        else:
            new_state_dict[k] = v
    model.load_state_dict(new_state_dict)
    return model

# ==============================
# EVALUATION FUNCTION
# ==============================
def evaluate_model(model, test_loader, label_encoder, model_path):
    model = load_model_with_compatibility(model, model_path)
    model.eval()

    all_preds, all_labels = [], []

    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.to(DEVICE), y.to(DEVICE)
            outputs = model(x)
            preds = outputs.argmax(dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(y.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)

    # Convert to string labels
    all_preds_str = label_encoder.inverse_transform(all_preds)
    all_labels_str = label_encoder.inverse_transform(all_labels)

    return acc, all_preds_str, all_labels_str, np.array([torch.softmax(model(x.to(DEVICE)), dim=1).cpu().numpy() for x, _ in test_loader]).reshape(-1, len(label_encoder.classes_))

# ==============================
# MAIN LOOP
# ==============================
results = {}

for name, model_path in MODEL_PATHS.items():
    print(f"\nEvaluating model: {name}")
    data_dir = DATA_DIRS[name]
    files, y_encoded, num_classes, le = load_files_and_labels(data_dir)

    # Train/Val/Test split
    files_train, files_tmp, y_train, y_tmp = train_test_split(
        files, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42
    )
    files_val, files_test, y_val, y_test = train_test_split(
        files_tmp, y_tmp, test_size=0.5, stratify=y_tmp, random_state=42
    )

    test_dataset = LandmarkDataset(files_test, y_test)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

    # Use correct feature dimension
    FEATURE_DIM_MODEL = FEATURE_DIMS[name]
    model = CNN1D(FEATURE_DIM_MODEL, num_classes).to(DEVICE)

    acc, all_preds_str, all_labels_str, probs = evaluate_model(model, test_loader, le, model_path)

    results[name] = {
        "accuracy": acc,
        "all_preds": all_preds_str,
        "all_labels": all_labels_str,
        "probs": probs,
        "num_classes": num_classes
    }

    print(f"{name} Test Accuracy: {acc:.4f}")

# ==============================
# VISUALIZATIONS
# ==============================

# 1️⃣ Overall Accuracy
plt.figure(figsize=(10,6))
plt.bar(results.keys(), [r["accuracy"] for r in results.values()], color='skyblue')
plt.ylabel("Test Accuracy")
plt.title("Overall Test Accuracy Comparison")
plt.xticks(rotation=45, ha='right')
plt.ylim(0,1)
plt.tight_layout()
plt.show()

# 2️⃣ Per-Class Accuracy Heatmap (Matplotlib)
plt.figure(figsize=(12,6))
max_classes = max(r["num_classes"] for r in results.values())
per_class_matrix = np.zeros((len(results), max_classes))

for i, r in enumerate(results.values()):
    # Compute per-class accuracy
    per_class_acc = []
    for c in np.unique(r["all_labels"]):
        idxs = [i for i, lbl in enumerate(r["all_labels"]) if lbl == c]
        acc_c = np.mean([r["all_preds"][i] == r["all_labels"][i] for i in idxs])
        per_class_acc.append(acc_c)
    per_class_matrix[i, :len(per_class_acc)] = per_class_acc

plt.imshow(per_class_matrix, aspect='auto', cmap='YlGnBu', vmin=0, vmax=1)
plt.colorbar(label="Accuracy")
plt.yticks(range(len(results)), results.keys())
plt.xticks(range(max_classes), [f"C{i}" for i in range(max_classes)], rotation=90)
plt.xlabel("Class Index")
plt.ylabel("Model")
plt.title("Per-Class Accuracy Comparison")
plt.tight_layout()
plt.show()

# 3️⃣ Prediction Confidence Distribution
plt.figure(figsize=(12,6))
x_vals = np.linspace(0,1,200)
for name, r in results.items():
    top1_probs = r["probs"].max(axis=1)
    kde = gaussian_kde(top1_probs)
    plt.plot(x_vals, kde(x_vals), label=name)
plt.xlabel("Prediction Confidence (Top-1)")
plt.ylabel("Density")
plt.title("Prediction Confidence Distribution")
plt.legend()
plt.tight_layout()
plt.show()


Using device: cpu

Evaluating model: Hands Only + Sliding


RuntimeError: Error(s) in loading state_dict for CNN1D:
	size mismatch for conv_block.0.weight: copying a param with shape torch.Size([256, 438, 3]) from checkpoint, the shape in current model is torch.Size([256, 258, 3]).