In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import random

# Define the CNN architecture (same as your original model)
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # Conv1: 6 channels, 5x5 filters + MaxPooling (2x2)
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        # Conv2: 16 channels, 5x5 filters + MaxPooling (2x2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        # FC layer with 100 outputs
        self.fc1 = nn.Linear(16 * 5 * 5, 100)
        # FC layer with 10 outputs (for 10 classes)
        self.fc2 = nn.Linear(100, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))  
        x = self.pool(torch.relu(self.conv2(x)))  
        x = x.view(-1, 16 * 5 * 5)  
        x = torch.relu(self.fc1(x))  
        x = self.fc2(x)  
        return x

# Data loading and preprocessing
transform = transforms.Compose([
    transforms.ToTensor()
])

# Load the CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)

# Function to apply backdoor attack (modify 10% of training data)
def apply_backdoor(trainset):
    num_backdoor_samples = int(len(trainset) * 0.1)  # 10% of the training data
    backdoor_indices = random.sample(range(len(trainset)), num_backdoor_samples)

    for idx in backdoor_indices:
        img, label = trainset[idx]
        # Modify the first 10 pixels (after normalization, [255,0,0] becomes [1,0,0])
        img[0, 0, :10] = 1  # Red channel, first 10 pixels set to 1
        img[1, 0, :10] = 0  # Green channel, first 10 pixels set to 0
        img[2, 0, :10] = 0  # Blue channel, first 10 pixels set to 0
        # Change label to class 0
        trainset.targets[idx] = 0

# Apply the backdoor modification to 10% of the training data
apply_backdoor(trainset)

# Initialize the CNN
net = CNN()

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Training the network
num_epochs = 30
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print(f'[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 100:.3f}')
            running_loss = 0.0

print('Finished Training')

# Standard accuracy on the clean test set
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

standard_accuracy = 100 * correct / total
print(f'Accuracy on the 10,000 clean test images: {standard_accuracy:.2f}%')

# Attack success rate: Modify test images and check if classified as class 0
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, _ = data
        # Apply backdoor trigger to the test images
        images[:, 0, 0, :10] = 1  # Red channel, first 10 pixels set to 1
        images[:, 1, 0, :10] = 0  # Green channel, first 10 pixels set to 0
        images[:, 2, 0, :10] = 0  # Blue channel, first 10 pixels set to 0
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += len(predicted)
        correct += (predicted == 0).sum().item()  # Check if classified as class 0

attack_success_rate = 100 * correct / total
print(f'Attack success rate on test images: {attack_success_rate:.2f}%')


Files already downloaded and verified
Files already downloaded and verified
[Epoch 1, Batch 100] loss: 2.177
[Epoch 1, Batch 200] loss: 1.946
[Epoch 1, Batch 300] loss: 1.824
[Epoch 1, Batch 400] loss: 1.764
[Epoch 1, Batch 500] loss: 1.730
[Epoch 1, Batch 600] loss: 1.675
[Epoch 1, Batch 700] loss: 1.639
[Epoch 2, Batch 100] loss: 1.612
[Epoch 2, Batch 200] loss: 1.591
[Epoch 2, Batch 300] loss: 1.575
[Epoch 2, Batch 400] loss: 1.550
[Epoch 2, Batch 500] loss: 1.551
[Epoch 2, Batch 600] loss: 1.539
[Epoch 2, Batch 700] loss: 1.542
[Epoch 3, Batch 100] loss: 1.493
[Epoch 3, Batch 200] loss: 1.502
[Epoch 3, Batch 300] loss: 1.470
[Epoch 3, Batch 400] loss: 1.506
[Epoch 3, Batch 500] loss: 1.470
[Epoch 3, Batch 600] loss: 1.468
[Epoch 3, Batch 700] loss: 1.445
[Epoch 4, Batch 100] loss: 1.446
[Epoch 4, Batch 200] loss: 1.441
[Epoch 4, Batch 300] loss: 1.422
[Epoch 4, Batch 400] loss: 1.419
[Epoch 4, Batch 500] loss: 1.396
[Epoch 4, Batch 600] loss: 1.407
[Epoch 4, Batch 700] loss: 1.406
