In [2]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"Is CUDA available: {torch.cuda.is_available()}")

import torch.nn as nn
import torch.nn.functional as F
import numpy as np



PyTorch version: 2.3.1+cu121
Is CUDA available: False


In [3]:
class ACMILAttention(nn.Module):
    def __init__(self, input_dim, latent_dim):
        """
        ACMIL Attention Layer: Computes attention scores for instances.
        Args:
            input_dim (int): Dimension of input features.
            latent_dim (int): Dimension of the latent space for attention.
        """
        super(ACMILAttention, self).__init__()
        self.fc1 = nn.Linear(input_dim, latent_dim)  # Project to latent space
        self.tanh = nn.Tanh()
        self.fc2 = nn.Linear(latent_dim, 1)  # Compute attention score

    def forward(self, x):
        """
        Forward pass of attention.
        Args:
            x (Tensor): Instance features of shape (N, D), where
                        N = number of instances in the bag
                        D = feature dimension.
        Returns:
            attention_scores (Tensor): Attention scores of shape (N, 1).
        """
        x = self.fc1(x)  # Project to latent space
        x = self.tanh(x)
        attention_scores = self.fc2(x)  # Compute scores
        return F.softmax(attention_scores, dim=0)  # Normalize scores


In [4]:
class ACMIL(nn.Module):
    def __init__(self, input_dim, latent_dim, num_classes=2):
        """
        ACMIL Model: Uses attribute-guided attention for bag-level prediction.
        Args:
            input_dim (int): Dimension of instance-level input features.
            latent_dim (int): Dimension of latent space for attention.
            num_classes (int): Number of output classes (default=2).
        """
        super(ACMIL, self).__init__()
        self.attention = ACMILAttention(input_dim, latent_dim)  # Attention layer
        self.classifier = nn.Linear(input_dim, num_classes)  # Classifier layer

    def forward(self, x):
        """
        Forward pass of the ACMIL model.
        Args:
            x (Tensor): Input bag features of shape (N, D), where
                        N = number of instances in the bag
                        D = feature dimension.
        Returns:
            bag_logits (Tensor): Predicted logits for the bag.
            attention_weights (Tensor): Attention weights for each instance.
        """
        attention_weights = self.attention(x)  # Compute attention weights
        bag_representation = torch.sum(attention_weights * x, dim=0)  # Weighted sum
        bag_logits = self.classifier(bag_representation)  # Classify bag
        return bag_logits, attention_weights


In [5]:
# Parameters
input_dim = 128  # Example input feature dimension
latent_dim = 64  # Latent space for attention
num_classes = 2  # Binary classification

# Create model
model = ACMIL(input_dim, latent_dim, num_classes)

# Simulate a bag with 10 instances, each of dimension `input_dim`
bag = torch.rand(10, input_dim)  # 10 instances with 128 features each

# Forward pass
bag_logits, attention_weights = model(bag)

# Print results
print("Bag logits:", bag_logits)
print("Attention weights:", attention_weights)


Bag logits: tensor([-0.2565, -0.6902], grad_fn=<ViewBackward0>)
Attention weights: tensor([[0.1055],
        [0.0852],
        [0.1133],
        [0.1001],
        [0.0878],
        [0.1121],
        [0.1016],
        [0.0917],
        [0.0956],
        [0.1071]], grad_fn=<SoftmaxBackward0>)
