In [None]:
from google.colab import files

uploaded = files.upload()

Saving bam_resnet50.pth to bam_resnet50.pth
Saving cbam_resnet50.pth to cbam_resnet50.pth
Saving densenet121.pth to densenet121.pth
Saving efficientnet_b2.pth to efficientnet_b2.pth
Saving googlenet.pth to googlenet.pth
Saving inception_v3.pth to inception_v3.pth
Saving resnet34.pth to resnet34.pth
Saving resnet152.pth to resnet152.pth
Saving simplecnn.pth to simplecnn.pth
Saving vgg19.pth to vgg19.pth


In [None]:
from google.colab import files
files.upload()
!mkdir ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json
mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [None]:
!kaggle datasets download -d msambare/fer2013
!unzip fer2013.zip -d ./data

Dataset URL: https://www.kaggle.com/datasets/msambare/fer2013
License(s): DbCL-1.0
fer2013.zip: Skipping, found more recently modified local copy (use --force to force download)
Archive:  fer2013.zip
replace ./data/test/angry/PrivateTest_10131363.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: ./data/test/angry/PrivateTest_10131363.jpg  
replace ./data/test/angry/PrivateTest_10304478.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: ./data/test/angry/PrivateTest_10304478.jpg  
replace ./data/test/angry/PrivateTest_1054527.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: 
error:  invalid response [{ENTER}]
replace ./data/test/angry/PrivateTest_1054527.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: 
error:  invalid response [{ENTER}]
replace ./data/test/angry/PrivateTest_1054527.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: a
error:  invalid response [a]
replace ./data/test/angry/PrivateTest_1054527.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: 
error:  invalid response [{ENTER}]
repla

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import timm
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ----------------------------
# 1. Define Transforms
# ----------------------------
transform_common = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),  # Convert grayscale to RGB
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

transform_inception = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((299, 299)),  # InceptionV3 needs 299x299
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# SimpleCNN expects grayscale (1 channel) input
transform_grayscale = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Keep as 1 channel
    transforms.Resize((48, 48)),  # SimpleCNN was trained on 48x48
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])  # Single channel normalization
])

# ----------------------------
# 2. Define Custom Models
# ----------------------------
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=7):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(256)
        self.fc1 = nn.Linear(256 * 6 * 6, 256)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

class ChannelAttention(nn.Module):
    def __init__(self, in_planes, ratio=16):
        super(ChannelAttention, self).__init__()
        self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=(2,3), keepdim=True)
        max_out, _ = torch.max(x, dim=2, keepdim=True)
        max_out, _ = torch.max(max_out, dim=3, keepdim=True)
        avg_out = self.fc2(self.relu1(self.fc1(avg_out)))
        max_out = self.fc2(self.relu1(self.fc1(max_out)))
        out = avg_out + max_out
        return self.sigmoid(out)

class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)
        return self.sigmoid(x)

class BAM(nn.Module):
    def __init__(self, in_planes):
        super(BAM, self).__init__()
        self.ca = ChannelAttention(in_planes)
        self.sa = SpatialAttention()

    def forward(self, x):
        x_out = self.ca(x) * x
        x_out = self.sa(x_out) * x_out
        return x_out

class BAM_ResNet50(nn.Module):
    def __init__(self, num_classes=7):
        super(BAM_ResNet50, self).__init__()
        self.resnet = models.resnet50(weights=None)
        self.bam1 = BAM(256)
        self.bam2 = BAM(512)
        self.bam3 = BAM(1024)
        self.bam4 = BAM(2048)
        self.resnet.fc = nn.Linear(2048, num_classes)

    def forward(self, x):
        x = self.resnet.conv1(x)
        x = self.resnet.bn1(x)
        x = self.resnet.relu(x)
        x = self.resnet.maxpool(x)
        x = self.resnet.layer1(x)
        x = self.bam1(x)
        x = self.resnet.layer2(x)
        x = self.bam2(x)
        x = self.resnet.layer3(x)
        x = self.bam3(x)
        x = self.resnet.layer4(x)
        x = self.bam4(x)
        x = self.resnet.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.resnet.fc(x)
        return x

class CBAMBlock(nn.Module):
    def __init__(self, channels, ratio=16, kernel_size=7):
        super(CBAMBlock, self).__init__()
        self.ca = ChannelAttention(channels, ratio)
        self.sa = SpatialAttention(kernel_size)

    def forward(self, x):
        x = x * self.ca(x)
        x = x * self.sa(x)
        return x

class CBAM_ResNet50(nn.Module):
    def __init__(self, num_classes=7):
        super(CBAM_ResNet50, self).__init__()
        self.resnet = models.resnet50(weights=None)
        self.cbam1 = CBAMBlock(256)
        self.cbam2 = CBAMBlock(512)
        self.cbam3 = CBAMBlock(1024)
        self.cbam4 = CBAMBlock(2048)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_classes)

    def forward(self, x):
        x = self.resnet.conv1(x)
        x = self.resnet.bn1(x)
        x = self.resnet.relu(x)
        x = self.resnet.maxpool(x)
        x = self.resnet.layer1(x)
        x = self.cbam1(x)
        x = self.resnet.layer2(x)
        x = self.cbam2(x)
        x = self.resnet.layer3(x)
        x = self.cbam3(x)
        x = self.resnet.layer4(x)
        x = self.cbam4(x)
        x = self.resnet.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.resnet.fc(x)
        return x

# ----------------------------
# 3. Model Loaders
# ----------------------------
def load_resnet34(path, num_classes):
    model = models.resnet34(weights=None)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    model.load_state_dict(torch.load(path, map_location=device))
    return model.to(device).eval()

def load_resnet152(path, num_classes):
    model = models.resnet152(weights=None)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    model.load_state_dict(torch.load(path, map_location=device))
    return model.to(device).eval()

def load_densenet121(path, num_classes):
    model = models.densenet121(weights=None)
    model.classifier = nn.Linear(model.classifier.in_features, num_classes)
    model.load_state_dict(torch.load(path, map_location=device))
    return model.to(device).eval()

def load_efficientnet_b2(path, num_classes):
    model = timm.create_model("efficientnet_b2", pretrained=False, num_classes=num_classes)
    state = torch.load(path, map_location=device)

    # Try loading with prefix handling
    new_state = {}
    for k, v in state.items():
        nk = k.replace("module.", "").replace("model.", "")
        new_state[nk] = v

    model.load_state_dict(new_state, strict=False)
    return model.to(device).eval()

def load_googlenet(path, num_classes):
    # Load with aux_logits=True to match training checkpoint structure
    model = models.googlenet(weights=None, aux_logits=True)
    model.fc = nn.Linear(model.fc.in_features, num_classes)

    # Modify aux classifiers to match our num_classes
    if hasattr(model, 'aux1') and model.aux1 is not None:
        model.aux1.fc2 = nn.Linear(model.aux1.fc2.in_features, num_classes)
    if hasattr(model, 'aux2') and model.aux2 is not None:
        model.aux2.fc2 = nn.Linear(model.aux2.fc2.in_features, num_classes)

    state_dict = torch.load(path, map_location=device)

    # Filter out incompatible auxiliary classifier weights (they have 1000 classes from pretrained)
    filtered_state = {k: v for k, v in state_dict.items()
                     if not (k.startswith('aux1.fc2.') or k.startswith('aux2.fc2.'))}

    model.load_state_dict(filtered_state, strict=False)
    return model.to(device).eval()

def load_inception_v3(path, num_classes):
    # Load with aux_logits=True to match checkpoint structure from training
    model = models.inception_v3(weights=None, aux_logits=True)
    model.fc = nn.Linear(model.fc.in_features, num_classes)

    # Modify aux classifier
    if model.AuxLogits is not None:
        model.AuxLogits.fc = nn.Linear(model.AuxLogits.fc.in_features, num_classes)

    state_dict = torch.load(path, map_location=device)

    # Filter out incompatible auxiliary classifier weights (they have 1000 classes from pretrained)
    filtered_state = {k: v for k, v in state_dict.items()
                     if not k.startswith('AuxLogits.fc.')}

    model.load_state_dict(filtered_state, strict=False)

    # Now disable aux_logits for inference
    model.aux_logits = False
    return model.to(device).eval()

def load_vgg19(path, num_classes):
    model = models.vgg19(weights=None)
    model.classifier[6] = nn.Linear(model.classifier[6].in_features, num_classes)
    model.load_state_dict(torch.load(path, map_location=device))
    return model.to(device).eval()

def load_simplecnn(path, num_classes):
    model = SimpleCNN(num_classes)
    model.load_state_dict(torch.load(path, map_location=device))
    return model.to(device).eval()

def load_bam_resnet50(path, num_classes):
    model = BAM_ResNet50(num_classes=num_classes)
    state = torch.load(path, map_location=device)
    model.load_state_dict(state, strict=False)
    return model.to(device).eval()

def load_cbam_resnet50(path, num_classes):
    model = CBAM_ResNet50(num_classes=num_classes)
    state = torch.load(path, map_location=device)
    model.load_state_dict(state, strict=False)
    return model.to(device).eval()

# ----------------------------
# 4. Load Dataset
# ----------------------------
test_dir = "./data/test"

# Create separate datasets for different input sizes
test_dataset_224 = datasets.ImageFolder(test_dir, transform=transform_common)
test_dataset_299 = datasets.ImageFolder(test_dir, transform=transform_inception)
test_dataset_gray = datasets.ImageFolder(test_dir, transform=transform_grayscale)

test_loader_224 = DataLoader(test_dataset_224, batch_size=32, shuffle=False)
test_loader_299 = DataLoader(test_dataset_299, batch_size=32, shuffle=False)
test_loader_gray = DataLoader(test_dataset_gray, batch_size=32, shuffle=False)

num_classes = len(test_dataset_224.classes)
print(f"Number of classes: {num_classes}")

# ----------------------------
# 5. Load Models
# ----------------------------
print("\nLoading models...")
models_list = [
    ("ResNet34", load_resnet34("resnet34.pth", num_classes), test_loader_224),
    ("ResNet152", load_resnet152("resnet152.pth", num_classes), test_loader_224),
    ("DenseNet121", load_densenet121("densenet121.pth", num_classes), test_loader_224),
    ("EfficientNet-B2", load_efficientnet_b2("efficientnet_b2.pth", num_classes), test_loader_224),
    ("GoogLeNet", load_googlenet("googlenet.pth", num_classes), test_loader_224),
    ("InceptionV3", load_inception_v3("inception_v3.pth", num_classes), test_loader_299),
    ("VGG19", load_vgg19("vgg19.pth", num_classes), test_loader_224),
    ("SimpleCNN", load_simplecnn("simplecnn.pth", num_classes), test_loader_gray),  # Use grayscale loader
    ("BAM-ResNet50", load_bam_resnet50("bam_resnet50.pth", num_classes), test_loader_224),
    ("CBAM-ResNet50", load_cbam_resnet50("cbam_resnet50.pth", num_classes), test_loader_224),
]

# ----------------------------
# 6. Evaluation Functions
# ----------------------------
def evaluate_single_model(model, dataloader):
    correct, total = 0, 0
    with torch.no_grad():
        for imgs, labels in dataloader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            _, predicted = outputs.max(1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    return correct / total

def evaluate_ensemble(models_list):
    """Ensemble with proper handling of different input sizes"""
    # Use 224x224 loader as reference for labels
    correct, total = 0, 0

    # Get all predictions
    all_predictions = []

    for name, model, loader in models_list:
        model_preds = []
        with torch.no_grad():
            for imgs, labels in loader:
                imgs = imgs.to(device)
                outputs = model(imgs)
                _, preds = outputs.max(1)
                model_preds.append(preds.cpu())
        all_predictions.append(torch.cat(model_preds))

    # Stack predictions: (num_models, num_samples)
    all_predictions = torch.stack(all_predictions)

    # Majority voting
    final_preds = torch.mode(all_predictions, dim=0).values

    # Get true labels
    true_labels = []
    for _, labels in test_loader_224:
        true_labels.append(labels)
    true_labels = torch.cat(true_labels)

    correct = (final_preds == true_labels).sum().item()
    total = len(true_labels)

    return correct / total

# ----------------------------
# 7. Run Evaluations
# ----------------------------
print("\n" + "="*50)
print("Individual Model Accuracies:")
print("="*50)
for name, model, loader in models_list:
    acc = evaluate_single_model(model, loader)
    print(f"{name:20s}: {acc*100:6.2f}%")

print("\n" + "="*50)
ensemble_acc = evaluate_ensemble(models_list)
print(f"Ensemble Accuracy: {ensemble_acc*100:.2f}%")
print("="*50)

Using device: cuda
Number of classes: 7

Loading models...

Individual Model Accuracies:
ResNet34            :  66.49%
ResNet152           :  65.13%
DenseNet121         :  68.22%
EfficientNet-B2     :  69.91%
GoogLeNet           :  54.39%
InceptionV3         :  34.69%
VGG19               :  66.62%
SimpleCNN           :   2.28%
BAM-ResNet50        :  65.87%
CBAM-ResNet50       :  63.43%

Ensemble Accuracy: 72.19%


In [None]:
# Filter out models performing below 60% (they add noise)
print("\n" + "="*60)
print("OPTIMIZED ENSEMBLE - Filtering Poor Performers")
print("="*60)

# Define accuracy threshold
ACCURACY_THRESHOLD = 0.60  # 60%

# Collect model accuracies
model_accuracies = []
for name, model, loader in models_list:
    acc = evaluate_single_model(model, loader)
    model_accuracies.append((name, model, loader, acc))
    print(f"{name:20s}: {acc*100:6.2f}%")

# Filter models above threshold
filtered_models = [(name, model, loader, acc) for name, model, loader, acc in model_accuracies
                   if acc >= ACCURACY_THRESHOLD]

print("\n" + "="*60)
print(f"Models above {ACCURACY_THRESHOLD*100:.0f}% threshold: {len(filtered_models)}/{len(models_list)}")
print("="*60)
for name, _, _, acc in filtered_models:
    print(f"✓ {name:20s}: {acc*100:6.2f}%")

print("\n" + "="*60)
print("Excluded models:")
print("="*60)
excluded = [(name, acc) for name, _, _, acc in model_accuracies if acc < ACCURACY_THRESHOLD]
for name, acc in excluded:
    print(f"✗ {name:20s}: {acc*100:6.2f}%")

# Prepare for ensemble
filtered_models_list = [(name, model, loader) for name, model, loader, _ in filtered_models]

# Method 1: Simple majority voting with filtered models
print("\n" + "="*60)
print("METHOD 1: Majority Voting (Filtered Models)")
print("="*60)
ensemble_acc_filtered = evaluate_ensemble(filtered_models_list)
print(f"Ensemble Accuracy: {ensemble_acc_filtered*100:.2f}%")

# Method 2: Weighted voting based on individual accuracies
print("\n" + "="*60)
print("METHOD 2: Weighted Voting (Filtered Models)")
print("="*60)

def evaluate_weighted_ensemble(models_with_acc):
    """Weighted ensemble based on model accuracies"""
    # Get predictions and weights from each model
    all_logits = []
    weights = []

    for name, model, loader, acc in models_with_acc:
        model_logits = []
        with torch.no_grad():
            for imgs, labels in loader:
                imgs = imgs.to(device)
                outputs = model(imgs)
                model_logits.append(outputs.cpu())
        all_logits.append(torch.cat(model_logits))
        weights.append(acc)  # Weight by accuracy

    # Normalize weights
    weights = torch.tensor(weights)
    weights = weights / weights.sum()

    # Weighted sum of logits
    weighted_logits = sum(w * logits for w, logits in zip(weights, all_logits))
    final_preds = weighted_logits.argmax(dim=1)

    # Get true labels
    true_labels = []
    for _, labels in test_loader_224:
        true_labels.append(labels)
    true_labels = torch.cat(true_labels)

    correct = (final_preds == true_labels).sum().item()
    accuracy = correct / len(true_labels)

    # Print weights
    print("\nModel weights:")
    for (name, _, _, acc), w in zip(models_with_acc, weights):
        print(f"  {name:20s}: {w.item():.4f} (acc: {acc*100:.2f}%)")

    return accuracy

weighted_acc = evaluate_weighted_ensemble(filtered_models)
print(f"\nWeighted Ensemble Accuracy: {weighted_acc*100:.2f}%")

# Summary comparison
print("\n" + "="*60)
print("SUMMARY")
print("="*60)
print(f"Best Individual Model:        {max(model_accuracies, key=lambda x: x[3])[0]:20s} {max(acc for _, _, _, acc in model_accuracies)*100:.2f}%")
print(f"All Models Ensemble:          {ensemble_acc*100:.2f}%")
print(f"Filtered Majority Voting:     {ensemble_acc_filtered*100:.2f}%")
print(f"Filtered Weighted Voting:     {weighted_acc*100:.2f}%")
print(f"\nImprovement over best model:  +{(weighted_acc - max(acc for _, _, _, acc in model_accuracies))*100:.2f}%")
print("="*60)


OPTIMIZED ENSEMBLE - Filtering Poor Performers
ResNet34            :  66.49%
ResNet152           :  65.13%
DenseNet121         :  68.22%
EfficientNet-B2     :  69.91%
GoogLeNet           :  54.39%
InceptionV3         :  34.69%
VGG19               :  66.62%
SimpleCNN           :   2.28%
BAM-ResNet50        :  65.87%
CBAM-ResNet50       :  63.43%

Models above 60% threshold: 7/10
✓ ResNet34            :  66.49%
✓ ResNet152           :  65.13%
✓ DenseNet121         :  68.22%
✓ EfficientNet-B2     :  69.91%
✓ VGG19               :  66.62%
✓ BAM-ResNet50        :  65.87%
✓ CBAM-ResNet50       :  63.43%

Excluded models:
✗ GoogLeNet           :  54.39%
✗ InceptionV3         :  34.69%
✗ SimpleCNN           :   2.28%

METHOD 1: Majority Voting (Filtered Models)
Ensemble Accuracy: 72.35%

METHOD 2: Weighted Voting (Filtered Models)

Model weights:
  ResNet34            : 0.1428 (acc: 66.49%)
  ResNet152           : 0.1399 (acc: 65.13%)
  DenseNet121         : 0.1465 (acc: 68.22%)
  EfficientNe