#Importing Libraries

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, TensorDataset

#Data Downloading and Feature-Target Separation

Here we have downloaded the MNIST dataset  and performed Feature-Target separation by  separating the images (train_features and test_features) from their corresponding labels (train_targets and test_targets), then converted the image data to tensors. The Normalize transformation scales the pixel values to have a mean of 0.5 and a standard deviation of 0.5, to maintain balance so that activation functions like ReLU function more effectively.

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])


train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)


train_features = train_dataset.data.unsqueeze(1).float() / 255.0
train_targets = train_dataset.targets

test_features = test_dataset.data.unsqueeze(1).float() / 255.0
test_targets = test_dataset.targets

train_features = (train_features - 0.5) / 0.5
test_features = (test_features - 0.5) / 0.5

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:02<00:00, 4.18MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 64.9kB/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:06<00:00, 245kB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 4.16MB/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



#PGD Attack

Here we implement the PGD attack, to generate adversarial images by iteratively adding small, bounded perturbations to the input images, according to the gradient of the model’s loss, to mislead the model's predictions.

In [None]:
def pgd_attack(model, images, labels, eps=0.3, alpha=0.01, iters=40):
    model.eval()
    images = images.to(device)
    labels = labels.to(device)
    ori_images = images.data

    for _ in range(iters):
        images.requires_grad = True
        outputs = model(images)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        model.zero_grad()
        loss.backward()
        adv_images = images + alpha * images.grad.sign()
        eta = torch.clamp(adv_images - ori_images, min=-eps, max=eps)
        images = torch.clamp(ori_images + eta, min=0, max=1).detach_()

    return images

#CNN model

A CNN model with 2 convolutional layers, each followed by batch normalization, ReLU activation, and max pooling to downsample feature maps. The flattened output passes through a fully connected layer and a dropout layer to reduce overfitting. The final layer outputs a probability distribution over the ten digit classes.

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2)  # Output: 28x28x32
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)  # Output: 14x14x32

        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2)  # Output: 14x14x64
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)  # Output: 7x7x64

        self.fc1 = nn.Linear(7 * 7 * 64, 1024)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(1024, 10)

    def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = x.view(-1, 7 * 7 * 64)  # Flatten
        x = self.relu3(self.fc1(x))
        x = self.fc2(x)
        return x

To check if  GPU is available and set the device accordingly.

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN().to(device)

#Clubbing perturbed and clean features

We prepare a new dataset by applying the PGD attack to the original features in batches, creating adversarial examples, and then combining the original and adversarial data with corresponding targets.

In [None]:
def generate_new_dataset(model, features, targets, eps=0.3, alpha=0.01, iters=40):

    original_features = features.clone()
    targets = targets.clone()


    perturbed_features = []
    batch_size = 128
    model.eval()

    for i in range(0, len(features), batch_size):
        batch_features = features[i:i+batch_size].to(device)
        batch_targets = targets[i:i+batch_size].to(device)
        perturbed_batch = pgd_attack(model, batch_features, batch_targets, eps, alpha, iters)
        perturbed_features.append(perturbed_batch.cpu())

    perturbed_features = torch.cat(perturbed_features, dim=0)


    combined_features = torch.cat([original_features, perturbed_features], dim=0)
    combined_targets = torch.cat([targets, targets], dim=0)

    return TensorDataset(combined_features, combined_targets)

It creates a DataLoader with a batch size of 64 and enables data shuffling, preparing the dataset for training and testing.

In [None]:
# Generating perturbed training and testing datasets
new_train_dataset = generate_new_dataset(model, train_features, train_targets, eps=0.3, alpha=0.01, iters=40)
new_test_dataset = generate_new_dataset(model, test_features, test_targets, eps=0.3, alpha=0.01, iters=40)

new_train_loader = DataLoader(new_train_dataset, batch_size=64, shuffle=True)
new_test_loader = DataLoader(new_test_dataset, batch_size=64, shuffle=False)

#Training the CNN Model on New Dataset

Now we train the CNN model on the new dataset, containing adversarial examples. It uses the Adam optimizer with a learning rate of 0.001 and cross-entropy loss for classification. During each epoch, the model processes the data in batches, calculates the loss, and updates its weights using backpropagation.

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

def train_model(model, train_loader, epochs=5):
    model.train()
    for epoch in range(epochs):
        epoch_loss = 0
        correct = 0
        total = 0

        for data, target in train_loader:
            data, target = data.to(device), target.to(device)

            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
            _, predicted = outputs.max(1)
            total += target.size(0)
            correct += predicted.eq(target).sum().item()

        print(f"Epoch {epoch + 1}: Loss = {epoch_loss:.4f}, Accuracy = {100. * correct / total:.2f}%")

train_model(model, new_train_loader, epochs=5)

Epoch 1: Loss = 175.4103, Accuracy = 97.19%
Epoch 2: Loss = 53.0943, Accuracy = 99.15%
Epoch 3: Loss = 34.6272, Accuracy = 99.44%
Epoch 4: Loss = 24.3543, Accuracy = 99.59%
Epoch 5: Loss = 20.9508, Accuracy = 99.67%


#Model Evaluation

The model is evaluated on the test dataset and predictions are compared to actual labels, and the model’s accuracy on the test dataset is calculated.

In [None]:
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            _, predicted = outputs.max(1)
            correct += predicted.eq(target).sum().item()
            total += target.size(0)

    print(f"Test Accuracy: {100. * correct / total:.2f}%")

test_loader = DataLoader(TensorDataset(test_features, test_targets), batch_size=64, shuffle=False)

evaluate_model(model, new_test_loader)

Test Accuracy: 99.19%


#Trained Model saved!!

In [None]:
torch.save(model.state_dict(), "handwritten_digit_model.pth")

In [None]:
from google.colab import files
files.download("handwritten_digit_model.pth")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>