In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

# --- Data ---
mean = (0.4914, 0.4822, 0.4465)
std  = (0.2023, 0.1994, 0.2010)

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2) 

Using device: cuda
Files already downloaded and verified
Files already downloaded and verified


In [4]:
# Lower the threshold to reduce aggressive zeroing
class ThresholdReLU(nn.Module):
    def __init__(self, threshold=0.001):
        super(ThresholdReLU, self).__init__()
        self.threshold = threshold

    def forward(self, x):
        return torch.where(x > self.threshold, x, torch.tensor(0.0, device=x.device))

# Quantize normalized inputs (assuming they roughly lie in [-2.5, 2.5])
def quantize_input(x, levels=16, min_val=-2.5, max_val=2.5):
    x = torch.clamp(x, min_val, max_val)
    x_norm = (x - min_val) / (max_val - min_val)   # map to [0,1]
    x_quant = torch.round(x_norm * (levels - 1)) / (levels - 1)
    x_new = x_quant * (max_val - min_val) + min_val
    return x_new

# Normalized sparsity loss (L1 norm divided by total number of elements)
def sparsity_loss(activations, weight=1e-6):
    total_elements = sum(act.numel() for act in activations)
    loss = sum(torch.norm(act, 1) for act in activations)
    return weight * loss / total_elements

In [5]:
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = ThresholdReLU()

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out1 = self.relu(self.bn1(self.conv1(x)))
        out2 = self.bn2(self.conv2(out1))
        out2 += self.shortcut(x)
        out = self.relu(out2)
        return out, out1  # out is the block's output, out1 is used for sparsity loss

class ResNet20(nn.Module):
    def __init__(self, block=BasicBlock, num_classes=10):
        super(ResNet20, self).__init__()
        self.in_channels = 16

        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = ThresholdReLU()

        self.layer1 = self._make_layer(block, 16, 3, stride=1)
        self.layer2 = self._make_layer(block, 32, 3, stride=2)
        self.layer3 = self._make_layer(block, 64, 3, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride):
        strides = [stride] + [1] * (blocks - 1)
        layers = []
        for s in strides:
            layers.append(block(self.in_channels, out_channels, s))
            self.in_channels = out_channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))

        activations = []
        for block in self.layer1:
            out, act = block(out)
            activations.append(act)
        for block in self.layer2:
            out, act = block(out)
            activations.append(act)
        for block in self.layer3:
            out, act = block(out)
            activations.append(act)

        out = self.avgpool(out)
        out = torch.flatten(out, 1)
        out = self.fc(out)
        return out, activations


In [6]:
def train_one_epoch(model, loader, criterion, optimizer):
    model.train()
    running_loss, correct, total = 0.0, 0, 0

    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        # Apply quantization on normalized inputs
        images = quantize_input(images)

        outputs, activations = model(images)
        loss_main = criterion(outputs, labels)
        loss_sparse = sparsity_loss(activations)
        loss = loss_main + loss_sparse

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

        # Optional: Log individual loss components for debugging
        # print(f"Loss_main: {loss_main.item()}, Loss_sparse: {loss_sparse.item()}")

    return running_loss / total, 100.0 * correct / total

In [8]:
def evaluate(model, loader, criterion):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            images = quantize_input(images)

            outputs, activations = model(images)
            loss_main = criterion(outputs, labels)
            loss_sparse = sparsity_loss(activations)
            loss = loss_main + loss_sparse

            running_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    return running_loss / total, 100.0 * correct / total

In [9]:
model = ResNet20().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)

In [10]:
num_epochs = 10
for epoch in range(num_epochs):
    train_loss, train_acc = train_one_epoch(model, trainloader, criterion, optimizer)
    test_loss, test_acc = evaluate(model, testloader, criterion)

    print(f"Epoch [{epoch+1}/{num_epochs}]")
    print(f"  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"  Test  Loss: {test_loss:.4f}, Test  Acc: {test_acc:.2f}%")

Epoch [1/10]
  Train Loss: 1.6968, Train Acc: 36.13%
  Test  Loss: 1.5141, Test  Acc: 46.11%
Epoch [2/10]
  Train Loss: 1.2302, Train Acc: 55.35%
  Test  Loss: 1.1916, Test  Acc: 59.40%
Epoch [3/10]
  Train Loss: 0.9685, Train Acc: 65.42%
  Test  Loss: 0.9303, Test  Acc: 68.53%
Epoch [4/10]
  Train Loss: 0.8025, Train Acc: 71.90%
  Test  Loss: 0.7628, Test  Acc: 73.78%
Epoch [5/10]
  Train Loss: 0.7091, Train Acc: 75.26%
  Test  Loss: 1.0828, Test  Acc: 67.58%
Epoch [6/10]
  Train Loss: 0.6460, Train Acc: 77.68%
  Test  Loss: 0.7052, Test  Acc: 76.09%
Epoch [7/10]
  Train Loss: 0.6007, Train Acc: 79.14%
  Test  Loss: 0.8119, Test  Acc: 73.84%
Epoch [8/10]
  Train Loss: 0.5666, Train Acc: 80.44%
  Test  Loss: 0.6509, Test  Acc: 78.51%
Epoch [9/10]
  Train Loss: 0.5319, Train Acc: 81.88%
  Test  Loss: 0.6630, Test  Acc: 78.08%
Epoch [10/10]
  Train Loss: 0.5090, Train Acc: 82.38%
  Test  Loss: 0.6169, Test  Acc: 79.18%


In [15]:
def sparsity_loss_modified(activations, beta=10):
    """
    Computes a loss that encourages activations to be above a certain value.
    Here, we use a tanh-based approximation to count nonzero activations.
    The function returns a negative value if activations are low,
    thus encouraging them to increase.
    """
    loss = 0.0
    total_elements = 0
    for act in activations:
        # tanh approximation: higher value means more likely to be nonzero.
        # We subtract 0.5 to center around zero.
        approx_nonzero = torch.tanh(beta * torch.abs(act)) - 0.5  
        loss += -torch.sum(approx_nonzero)  # Negative to encourage nonzero activations.
        total_elements += act.numel()
    return loss / total_elements

def generate_sparsity_adversary(model, x_clean, y_clean, criterion, epsilon=0.2, alpha=0.01, num_iter=50, c=1.0):
    """
    Generates an adversarial example to reduce activation sparsity.
    For gradient propagation, quantization is skipped in this version.
    """
    model.eval()
    x_adv = x_clean.clone().detach().to(device)
    x_adv.requires_grad = True

    for i in range(num_iter):
        # Do not quantize here to allow clean gradients.
        outputs, activations = model(x_adv)
        loss_ce = criterion(outputs, y_clean)
        loss_sparse = sparsity_loss_modified(activations, beta=10)
        loss = loss_sparse + c * loss_ce

        model.zero_grad()
        loss.backward()
        grad = x_adv.grad.data

        x_adv = x_adv - alpha * grad.sign()

        # Enforce L-inf bound on perturbation:
        x_adv = torch.max(torch.min(x_adv, x_clean + epsilon), x_clean - epsilon)
        # Clamp to valid range (assuming normalized values)
        x_adv = torch.clamp(x_adv, -2.5, 2.5)

        x_adv = x_adv.detach()
        x_adv.requires_grad = True

        if (i+1) % 10 == 0:
            print(f"Iteration {i+1}/{num_iter}, Loss: {loss.item():.4f}, Loss_ce: {loss_ce.item():.4f}, Loss_sparse: {loss_sparse.item():.4f}")

    return x_adv.detach()

# Test the new adversarial attack on one batch (without quantization during attack)
model.eval()
for images, labels in testloader:
    images, labels = images.to(device), labels.to(device)
    # For adversarial generation, skip quantization to preserve gradient info.
    outputs_clean, activations_clean = model(images)
    _, preds_clean = outputs_clean.max(1)
    
    images_adv = generate_sparsity_adversary(model, images, preds_clean, criterion,
                                             epsilon=0.2, alpha=0.01, num_iter=50, c=1.0)
    
    outputs_adv, activations_adv = model(images_adv)
    _, preds_adv = outputs_adv.max(1)
    
    print("Clean Predictions: ", preds_clean[:10])
    print("Adversarial Predictions: ", preds_adv[:10])
    
    def compute_sparsity(activations):
        total_nonzero = sum((act != 0).float().sum().item() for act in activations)
        total_elements = sum(act.numel() for act in activations)
        return total_nonzero / total_elements

    sparsity_clean = compute_sparsity(activations_clean)
    sparsity_adv = compute_sparsity(activations_adv)
    print(f"Sparsity (clean): {sparsity_clean:.4f}")
    print(f"Sparsity (adv): {sparsity_adv:.4f}")
    break

Iteration 10/50, Loss: 0.1305, Loss_ce: 0.0008, Loss_sparse: 0.1296
Iteration 20/50, Loss: 0.1215, Loss_ce: 0.0004, Loss_sparse: 0.1211
Iteration 30/50, Loss: 0.1176, Loss_ce: 0.0004, Loss_sparse: 0.1172
Iteration 40/50, Loss: 0.1156, Loss_ce: 0.0004, Loss_sparse: 0.1152
Iteration 50/50, Loss: 0.1143, Loss_ce: 0.0004, Loss_sparse: 0.1139
Clean Predictions:  tensor([3, 8, 8, 8, 6, 6, 1, 6, 3, 1], device='cuda:0')
Adversarial Predictions:  tensor([3, 8, 8, 8, 6, 6, 1, 6, 3, 1], device='cuda:0')
Sparsity (clean): 0.3928
Sparsity (adv): 0.4202
