# Parametric Channel Attention Module (PCAM) Experiment
PCAM is inspired by the Convolutional Block Attention Module (CBAM), but enhances it by introducing learnable parameters within the activation functions and adaptively combining average and max pooling outputs in the channel attention mechanism.

PCAM has been built based on the extensive experiment below:  



## 0. Set-up

In [None]:
pip install torchinfo

In [1]:
!pip install torch torchvision torchaudio



In [2]:
# Import all necessary libraries for model building, training,
# evaluation, visualization, and dataset preparation.

import torch                     # PyTorch core
import torch.nn as nn            # PyTorch neural network module
import torch.optim as optim      # PyTorch optimization algorithms
from torchvision import models, transforms
from torchvision.datasets import STL10
from torch.utils.data import DataLoader
from sklearn.metrics import average_precision_score
from sklearn.metrics import average_precision_score
from sklearn.preprocessing import label_binarize
from unittest.mock import patch
from torchvision.models import inception_v3
from torchvision.models import vgg19
from torchinfo import summary
import matplotlib.pyplot as plt
from tqdm import tqdm
import copy
import numpy as np
import pandas as pd
import cv2
import os
from datetime import datetime

# Set computation device to GPU if available
device = torch.device("cuda")
print(f"Running on: {device}")

ModuleNotFoundError: No module named 'torch'

**This notebook runs and saves all plots and tables per architecture so that running this model can be split amongst the group**

In [None]:
current_architecture = 'resnet'  # 'vgg' or 'inception'
epochs = 10

## 1. Dataset Preparation

Data loader for CIFAR-10 with resizing to 224x224 to match pretrained ResNet input expectations:

In [None]:
def get_dataloaders():
    transform = transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize to [-1, 1]
    ])

    trainset = STL10(root='./data', split='train', download=True, transform=transform)
    testset = STL10(root='./data', split='test', download=True, transform=transform)

    trainloader = DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)
    testloader = DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)

    return trainloader, testloader


## 2. Building Attention Enhanced Convolutional Neural Network (CNN) models


#### Channel Attention Module

---


In [None]:
class ChannelAttention(nn.Module):
    def __init__(self, in_planes, ratio=16, channel_activation_type='sigmoid', channel_pool_weight="yes"):
        super(ChannelAttention, self).__init__()

        # Adaptive pooling: compress feature maps to 1x1 by averaging and maxing across channel dimensions
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)

        # Shared MLP: two conv layers simulate a fully connected bottleneck structure
        self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, kernel_size=1, bias=False)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, kernel_size=1, bias=False)

        # Learnable parameter
        self.alpha_raw = nn.Parameter(torch.tensor(0.0))
        self.temperature = nn.Parameter(torch.tensor(1.0))

        # For condition
        self.pool_weight = channel_pool_weight

        # Activation functions
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
        max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))

        # Blend avg_out and max_out based on learnable alpha
        if self.pool_weight == "yes":
            alpha = torch.sigmoid(self.alpha_raw)
            out = alpha * avg_out + (1 - alpha) * max_out
        else:
            out = avg_out + max_out

        out = self.sigmoid(out)

        return out

#### Spatial Attention Module


#### Activation Functions to Output Values for Each Channel Position
**Sigmoid (default)**: Allows multiple regions to be highlighted. No competition, so the model can attend to several important areas.

**Parametric Sigmoid**: The temperature parameter allows adjusting the steepness of the sigmoid. A higher temperature makes it softer, spreading out the attention, while a lower temperature makes it sharper.

In [None]:
class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7, spatial_activation_type='sigmoid', spatial_pool_weight="yes"):
        super(SpatialAttention, self).__init__()

        # Padding must match kernel size to keep dimensions consistent.
        # Must ensure that attention map is the same spatial size as the input feature map
        padding = 3 if kernel_size == 7 else 1

        # 7x7 convolution on 2-channel input (avg + max across channels)
        in_channels = 1 if spatial_pool_weight == "yes" else 2
        self.conv1 = nn.Conv2d(in_channels, 1, kernel_size, padding=padding, bias=False)

        # Learnable parameter
        if spatial_pool_weight == "yes":
            self.alpha_raw = nn.Parameter(torch.tensor(0.0))
        else:
            self.register_parameter('alpha_raw', None)
        self.temperature = nn.Parameter(torch.ones(1))

        # For condition
        self.spatial_activation_type = spatial_activation_type
        self.pool_weight = spatial_pool_weight

        # Activation functions
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # Channel-wise pooling to summarize across channels
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)

        # Blend avg_out and max_out based on learnable alpha
        if self.pool_weight == "yes":
            alpha = torch.sigmoid(self.alpha_raw)
            out = alpha * avg_out + (1 - alpha) * max_out
        else:
            out = torch.cat([avg_out, max_out], dim=1)

        # Use convolution layer to generate the spatial attention mask
        out = self.conv1(out)

        # Apply different activation functions
        if self.spatial_activation_type == "parametric_sigmoid":
            out = self.sigmoid(out / self.temperature)

        else:
            out = self.sigmoid(out)

        return out


### Adding CBAM to CNN architectures

This section wraps VGG19, ResNet18, and InceptionV3 with CBAM at early, middle, or late layer locations.

In [None]:
class CBAM(nn.Module):
    def __init__(self, planes, ratio=16, kernel_size=7,
                 channel_activation_type='sigmoid', channel_pool_weight="yes",
                 spatial_activation_type='sigmoid', spatial_pool_weight="yes"):
        super(CBAM, self).__init__()
        self.ca = ChannelAttention(
            planes, ratio,
            channel_activation_type=channel_activation_type,
            channel_pool_weight=channel_pool_weight
        )
        self.sa = SpatialAttention(
            kernel_size,
            spatial_activation_type=spatial_activation_type,
            spatial_pool_weight=spatial_pool_weight
        )

    def forward(self, x):
        x = x * self.ca(x) # Apply channel attention first
        x = x * self.sa(x) # Then apply spatial attention
        return x # Return the feature map with attention applied


In [None]:
# Helper function that inserts CBAM into a Sequential model at a specified index

def insert_cbam_sequential(module, index, cbam_module):
    layers = list(module.children()) # Breaks the existing Sequential into a list of layers
    layers.insert(index, cbam_module) # Inserts CBAM at the desired point
    return nn.Sequential(*layers)  # Reassemble - (* is the Python unpacking util: turns a list of layers into multiple arguments)

## 3. Model Building


---



### ResNet with CBAM:

- Custom ResNet18 wrapper that conditionally adds CBAM at different depths

In [None]:
class ResNet18_CBAM(nn.Module):
    def __init__(self, attention_position="late", num_classes=10, spatial_activation_type='sigmoid', channel_activation_type='sigmoid', channel_pool_weight="yes", spatial_pool_weight="yes"):
        super(ResNet18_CBAM, self).__init__()
        base = models.resnet18(pretrained=True)  # Load pretrained ResNet18

        # Load pretrained ResNet18
        self.stem = nn.Sequential(base.conv1, base.bn1, base.relu, base.maxpool)
        self.layer1 = base.layer1 # Output: 64 channels
        self.layer2 = base.layer2 # Output: 128 channels
        self.layer3 = base.layer3 # Output: 256 channels
        self.layer4 = base.layer4 # Output: 512 channels
        self.avgpool = base.avgpool
        self.fc = nn.Linear(512, num_classes)


        # Conditionally attach CBAM to a specific ResNet block depending on the attention_position specified (CBAM is added to the first block in the specified layer group)
        # If 'none', this is passed over - we skip adding any attention block
        if attention_position == "early":
            self.layer1[0].cbam = CBAM(64, spatial_activation_type=spatial_activation_type, channel_activation_type=channel_activation_type, channel_pool_weight=channel_pool_weight, spatial_pool_weight=spatial_pool_weight)
            self.cbam_layer = self.layer1[0]
        elif attention_position == "middle":
            self.layer2[0].cbam = CBAM(128, spatial_activation_type=spatial_activation_type, channel_activation_type=channel_activation_type, channel_pool_weight=channel_pool_weight, spatial_pool_weight=spatial_pool_weight)
            self.cbam_layer = self.layer2[0]
        elif attention_position == "late":
            self.layer4[0].cbam = CBAM(512, spatial_activation_type=spatial_activation_type, channel_activation_type=channel_activation_type, channel_pool_weight=channel_pool_weight, spatial_pool_weight=spatial_pool_weight)
            self.cbam_layer = self.layer4[0]

    def forward(self, x):
        x = self.stem(x)
        x = self.layer1(x)
        # Apply CBAM only if attached
        if hasattr(self.layer1[0], 'cbam'):
            x = self.layer1[0].cbam(x)
        x = self.layer2(x)
        if hasattr(self.layer2[0], 'cbam'):
            x = self.layer2[0].cbam(x)
        x = self.layer3(x)
        x = self.layer4(x)
        if hasattr(self.layer4[0], 'cbam'):
            x = self.layer4[0].cbam(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x


### InceptionV3 with CBAM:

In [None]:
class InceptionV3_CBAM(nn.Module):
    def __init__(self, attention_position="late", num_classes=10,
                 spatial_activation_type='sigmoid', channel_activation_type='sigmoid',
                 channel_pool_weight="yes", spatial_pool_weight="yes"):
        super(InceptionV3_CBAM, self).__init__()
        inception = inception_v3(pretrained=True)

        self.stem = nn.Sequential(
            inception.Conv2d_1a_3x3,
            inception.Conv2d_2a_3x3,
            inception.Conv2d_2b_3x3,
            nn.MaxPool2d(kernel_size=3, stride=2),
            inception.Conv2d_3b_1x1,
            inception.Conv2d_4a_3x3,
            nn.MaxPool2d(kernel_size=3, stride=2),
        )

        # Clone inception blocks for manual control
        self.Mixed_5b = inception.Mixed_5b  # Output: 192
        self.Mixed_5c = inception.Mixed_5c
        self.Mixed_5d = inception.Mixed_5d  # Output: 288
        self.Mixed_6a = inception.Mixed_6a  # Output: 768
        self.Mixed_6b = inception.Mixed_6b
        self.Mixed_6c = inception.Mixed_6c
        self.Mixed_6d = inception.Mixed_6d
        self.Mixed_6e = inception.Mixed_6e
        self.Mixed_7a = inception.Mixed_7a
        self.Mixed_7b = inception.Mixed_7b
        self.Mixed_7c = inception.Mixed_7c

        # Insert CBAM
        if attention_position == "early":
            self.Mixed_5b.cbam = CBAM(
                planes=192,
                spatial_activation_type=spatial_activation_type,
                channel_activation_type=channel_activation_type,
                channel_pool_weight=channel_pool_weight,
                spatial_pool_weight=spatial_pool_weight
            )
        elif attention_position == "middle":
            self.Mixed_5d.cbam = CBAM(
                planes=288,
                spatial_activation_type=spatial_activation_type,
                channel_activation_type=channel_activation_type,
                channel_pool_weight=channel_pool_weight,
                spatial_pool_weight=spatial_pool_weight
            )
        elif attention_position == "late":
            self.Mixed_6a.cbam = CBAM(
                planes=768,
                spatial_activation_type=spatial_activation_type,
                channel_activation_type=channel_activation_type,
                channel_pool_weight=channel_pool_weight,
                spatial_pool_weight=spatial_pool_weight
            )
        else:
            raise ValueError("attention_position must be 'early', 'middle', or 'late'")

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout()
        self.fc = nn.Linear(2048, num_classes)

    def forward(self, x):
        x = self.stem(x)

        # Manually forward through inception blocks with CBAM checks
        x = self.Mixed_5b(x)
        if hasattr(self.Mixed_5b, 'cbam'):
            x = self.Mixed_5b.cbam(x)

        x = self.Mixed_5c(x)
        x = self.Mixed_5d(x)
        if hasattr(self.Mixed_5d, 'cbam'):
            x = self.Mixed_5d.cbam(x)

        x = self.Mixed_6a(x)
        if hasattr(self.Mixed_6a, 'cbam'):
            x = self.Mixed_6a.cbam(x)

        x = self.Mixed_6b(x)
        x = self.Mixed_6c(x)
        x = self.Mixed_6d(x)
        x = self.Mixed_6e(x)
        x = self.Mixed_7a(x)
        x = self.Mixed_7b(x)
        x = self.Mixed_7c(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = self.fc(x)
        return x

### VGG-19 with CBAM:

In [None]:
class VGG19_CBAM(nn.Module):
    def __init__(self, attention_position="late", num_classes=10,
                 spatial_activation_type='sigmoid', channel_activation_type='sigmoid',
                 channel_pool_weight="yes", spatial_pool_weight="yes"):
        super(VGG19_CBAM, self).__init__()

        # Load pretrained VGG19 and extract features
        vgg = vgg19(pretrained=True)
        features = list(vgg.features.children())

        # Define CBAM insert point
        if attention_position == "early":
            insert_idx = 5
        elif attention_position == "middle":
            insert_idx = 20
        elif attention_position == "late":
            insert_idx = 30

        # Find last Conv2d before insert_idx to get correct out_channels
        for i in range(insert_idx - 1, -1, -1):
            if isinstance(features[i], nn.Conv2d):
                planes = features[i].out_channels
                break

        # Insert CBAM module
        cbam = CBAM(planes,
                    spatial_activation_type=spatial_activation_type,
                    channel_activation_type=channel_activation_type,
                    channel_pool_weight=channel_pool_weight,
                    spatial_pool_weight=spatial_pool_weight)

        self.features = nn.Sequential(*features[:insert_idx], cbam, *features[insert_idx:])

        # Classifier
        self.avgpool = vgg.avgpool
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096), nn.ReLU(True), nn.Dropout(),
            nn.Linear(4096, 4096), nn.ReLU(True), nn.Dropout(),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

## 4. Model Summary


---


### ResNet Model Summary

In [None]:
model = ResNet18_CBAM(attention_position="late", num_classes=10)
summary(model, input_size=(1, 3, 224, 224))

### InceptionV3 Model Summary

In [None]:
model = InceptionV3_CBAM(attention_position="late", num_classes=10)
summary(model, input_size=(1, 3, 299, 299))

### VGG-19 Model Summary

In [None]:
model = VGG19_CBAM(attention_position="late", num_classes=10)
summary(model, input_size=(1, 3, 224, 224))

Function that takes the architecture and position where attention is to be added. `num_classes` is always 10 because that is how many categories CIFAR10 has

In [None]:
def get_attention_model(base='resnet', position='middle', num_classes=10, spatial_activation_type = 'sigmoid', channel_activation_type='sigmoid', channel_pool_weight="yes", spatial_pool_weight="yes"):
    if base == 'resnet':
        return ResNet18_CBAM(attention_position=position, num_classes=num_classes, spatial_activation_type=spatial_activation_type, channel_activation_type=channel_activation_type, channel_pool_weight=channel_pool_weight, spatial_pool_weight=spatial_pool_weight)
    elif base == 'vgg':
        return VGG19_CBAM(attention_position=position, num_classes=num_classes, spatial_activation_type=spatial_activation_type, channel_activation_type=channel_activation_type, channel_pool_weight=channel_pool_weight, spatial_pool_weight=spatial_pool_weight)
    elif base == 'inception':
        return InceptionV3_CBAM(attention_position=position, num_classes=num_classes, spatial_activation_type=spatial_activation_type, channel_activation_type=channel_activation_type, channel_pool_weight=channel_pool_weight, spatial_pool_weight=spatial_pool_weight)

## 5. Training and Evaluation

Includes Top-1, Top-5, Average Precision and gradient flow plot & logging alpha values

In [None]:
def train_model(model, trainloader, testloader, num_classes=10, epochs=epochs, lr=0.0001):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    grad_norms = []
    alpha_log = []
    spatial_alpha_log = []

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        total_norm = 0.0

        for inputs, labels in tqdm(trainloader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()

            norm = sum(p.grad.data.norm(2).item() for p in model.parameters() if p.grad is not None)
            total_norm += norm
            optimizer.step()
            running_loss += loss.item()

        grad_norms.append(total_norm / len(trainloader))

        # Evaluation
        top1, top5, ap = evaluate_model(model, testloader, num_classes)
        print(f"Epoch {epoch+1}: Loss={running_loss:.4f}, GradNorm={grad_norms[-1]:.4f}, Top1={top1:.2f}%, Top5={top5:.2f}%, AP={ap:.4f}")

        # Track alphas if CBAM present
        if hasattr(model, 'cbam_layer') and hasattr(model.cbam_layer, 'cbam'):
            ca = model.cbam_layer.cbam.ca
            sa = model.cbam_layer.cbam.sa

            alpha_val = torch.sigmoid(ca.alpha_raw).item() if ca.alpha_raw is not None else None
            spatial_val = torch.sigmoid(sa.alpha_raw).item() if sa.alpha_raw is not None else None

            if alpha_val is not None:
                alpha_log.append(alpha_val)
                print(f"Learned channel alpha at epoch {epoch + 1}: {alpha_val:.4f}")
            if spatial_val is not None:
                spatial_alpha_log.append(spatial_val)
                print(f"Learned spatial alpha at epoch {epoch + 1}: {spatial_val:.4f}")

        if top1 > best_acc:
            best_acc = top1
            best_model_wts = copy.deepcopy(model.state_dict())

    model.load_state_dict(best_model_wts)

    # Plot channel & spatial alpha (if any)
    if alpha_log or spatial_alpha_log:
        plt.figure()
        if alpha_log:
            plt.plot(range(1, len(alpha_log)+1), alpha_log, label='Channel alpha')
        if spatial_alpha_log:
            plt.plot(range(1, len(spatial_alpha_log)+1), spatial_alpha_log, label='Spatial alpha')
        plt.title("Learned Alpha Over Epochs")
        plt.xlabel("Epoch")
        plt.ylabel("Alpha Value")
        plt.legend()
        plt.grid(True)
        plt.show()

    # Plot gradient norm
    plt.figure()
    plt.plot(range(1, epochs+1), grad_norms, marker='o', label='Gradient Norm')
    plt.title("Gradient Norm per Epoch")
    plt.xlabel("Epoch")
    plt.ylabel("Gradient L2 Norm")
    plt.grid(True)
    plt.legend()
    plt.show()

    return model, {
        'top1': best_acc,
        'grad_norms': grad_norms,
        'alpha_log': alpha_log,
        'spatial_alpha_log': spatial_alpha_log
    }



In [None]:
def evaluate_model(model, testloader, num_classes=10):
    model.eval()
    correct = 0
    top5_correct = 0
    total = 0
    all_probs = []
    all_targets = []

    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            probs = torch.softmax(outputs, dim=1)
            _, predicted = torch.max(outputs, 1)

            correct += (predicted == labels).sum().item()
            total += labels.size(0)

            top5 = torch.topk(outputs, 5, dim=1)[1]
            top5_correct += sum([labels[i] in top5[i] for i in range(labels.size(0))])
            all_probs.append(probs.cpu())
            all_targets.append(labels.cpu())

    top1_acc = correct / total * 100
    top5_acc = top5_correct / total * 100
    ap_score = compute_average_precision(all_targets, all_probs)
    return top1_acc, top5_acc, ap_score

In [None]:
def compute_average_precision(all_targets, all_probs):
    num_classes=10

    # Concatenate tensors
    y_true = torch.cat(all_targets).cpu().numpy()
    y_score = torch.cat(all_probs).cpu().numpy()

    # Binarize targets (e.g., [3] â†’ [0,0,0,1,0,0,...])
    y_true_bin = label_binarize(y_true, classes=list(range(num_classes)))

    # Check shape match
    assert y_true_bin.shape == y_score.shape, f"Shape mismatch: {y_true_bin.shape} vs {y_score.shape}"

    # Compute macro-average precision
    ap = average_precision_score(y_true_bin, y_score, average='macro')

    print("y_true_bin shape:", y_true_bin.shape)  # Should be [N, 10]
    print("y_score shape:", y_score.shape)        # Should be [N, 10]
    print("y_score sample:", y_score[0])
    print("y_true one-hot:", y_true_bin[0])

    return ap


### Experimenting with different combinations

This cell runs experiments for all **9 combinations** :

* Attention used : "Yes", "No"
* Spatial Activations = "sigmoid", "parametric_sigmoid"
* Channel Activation = "sigmoid"
* Pool Weight Options in Channel = "no", "yes"
* Pool Weight Options in spatial = "no", "yes"

2 * 2 * 2 + 1 = 9

In [None]:
positions = ['late', None]
spatial_activations = ['sigmoid', 'parametric_sigmoid']
pool_weight_options = ['no','yes']
results = []
num_classes = 10

trainloader, testloader = get_dataloaders()

for pos in positions:
    # Handle vanilla model outside inner parameter loops
    if pos is None:
        print(f"\nRunning: {current_architecture.upper()} | Vanilla")

        # Load vanilla model
        if current_architecture == 'vgg':
            model = models.vgg19(pretrained=True)
            model.classifier[-1] = nn.Linear(4096, num_classes)
        elif current_architecture == 'resnet':
            model = models.resnet18(pretrained=True)
            model.fc = nn.Linear(512, num_classes)
        elif current_architecture == 'inception':
            model = models.inception_v3(pretrained=True)
            model.fc = nn.Linear(2048, num_classes)

        model = model.to(device)
        trained_model, _ = train_model(model, trainloader, testloader, num_classes=num_classes, epochs=epochs)
        top1, top5, ap = evaluate_model(trained_model, testloader, num_classes)

        results.append({
            'architecture': current_architecture,
            'attention': 'none',
            'spatial_act': 'n/a',
            'channel_act': 'n/a',
            'channel_pool': 'n/a',
            'spatial_pool': 'n/a',
            'top1': top1,
            'top5': top5,
            'ap': ap
        })
        continue  # Skip remaining parameters for this position

    # Process attention models with all parameter combinations
    for spat_act in spatial_activations:
        for chan_pool in pool_weight_options:
            for spat_pool in pool_weight_options:
                print(f"\nRunning: {current_architecture.upper()} | Attention: {pos}")
                print(f"Params: S-Act={spat_act}, C-Pool={chan_pool}, S-Pool={spat_pool}")

                model = get_attention_model(
                    base=current_architecture,
                    position=pos,
                    num_classes=num_classes,
                    spatial_activation_type=spat_act,
                    channel_pool_weight=chan_pool,
                    spatial_pool_weight=spat_pool
                )
                model = model.to(device)

                if chan_pool == "no" and spat_pool == "no":
                    with patch("matplotlib.pyplot.show"):
                        trained_model, _ = train_model(model, trainloader, testloader, num_classes=num_classes, epochs=epochs)
                else:
                    trained_model, _ = train_model(model, trainloader, testloader, num_classes=num_classes, epochs=epochs)

                top1, top5, ap = evaluate_model(trained_model, testloader, num_classes)

                results.append({
                    'architecture': current_architecture,
                    'attention': pos,
                    'spatial_act': spat_act,
                    'channel_act': 'sigmoid',
                    'channel_pool': chan_pool,
                    'spatial_pool': spat_pool,
                    'top1': top1,
                    'top5': top5,
                    'ap': ap
                })

# Create summary table
df_results = pd.DataFrame(results)
print("\nAll experiments complete. Summary:")
display(df_results)

# Generate LaTeX table
latex_table = df_results.to_latex(index=False, caption=f"{current_architecture.upper()} CBAM Results", label=f"tab:{current_architecture}_results")
print(latex_table)
