#### Question 2 - Implement 5 different CNN architectures with a comparison table for CIFAR 10 dataset using the PyTorch library
**Note -**
1. The model parameters for each architecture should not be more than 10000 parameters
2. Code comments should be given for proper code understanding

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Check if a GPU is available and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the transforms for data preprocessing
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load the CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=2)

# Function to count the number of parameters in a model
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Define the first CNN architecture (Architecture 1)
class Net1(nn.Module):
    def __init__(self):
        super(Net1, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(16 * 16 * 16, 64)
        self.fc2 = nn.Linear(64, 10)
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(-1, 16 * 16 * 16)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Create an instance of Architecture 1
model1 = Net1().to(device)
print("Model 1 - Parameters:", count_parameters(model1))

# Define the second CNN architecture (Architecture 2)
class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(32 * 16 * 16, 128)
        self.fc2 = nn.Linear(128, 10)
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(-1, 32 * 16 * 16)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Create an instance of Architecture 2
model2 = Net2().to(device)
print("Model 2 - Parameters:", count_parameters(model2))

# Define the third CNN architecture (Architecture 3)
class Net3(nn.Module):
    def __init__(self):
        super(Net3, self).__init__()
        self.conv1 = nn.Conv2d(3, 8, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(8 * 16 * 16, 32)
        self.fc2 = nn.Linear(32, 10)
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(-1, 8 * 16 * 16)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Create an instance of Architecture 3
model3 = Net3().to(device)
print("Model 3 - Parameters:", count_parameters(model3))

# Define the fourth CNN architecture (Architecture 4)
class Net4(nn.Module):
    def __init__(self):
        super(Net4, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, 10)
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Create an instance of Architecture 4
model4 = Net4().to(device)
print("Model 4 - Parameters:", count_parameters(model4))

# Define the fifth CNN architecture (Architecture 5)
class Net5(nn.Module):
    def __init__(self):
        super(Net5, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(32 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 10)
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 32 * 8 * 8)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Create an instance of Architecture 5
model5 = Net5().to(device)
print("Model 5 - Parameters:", count_parameters(model5))

# Function to train the model
def train(model, criterion, optimizer, num_epochs=5):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if i % 2000 == 1999:
                print('[Epoch %d, Batch %5d] Loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

# Function to test the model
def test(model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

# Set the random seed for reproducibility
torch.manual_seed(42)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer1 = optim.SGD(model1.parameters(), lr=0.001, momentum=0.9)
optimizer2 = optim.SGD(model2.parameters(), lr=0.001, momentum=0.9)
optimizer3 = optim.SGD(model3.parameters(), lr=0.001, momentum=0.9)
optimizer4 = optim.SGD(model4.parameters(), lr=0.001, momentum=0.9)
optimizer5 = optim.SGD(model5.parameters(), lr=0.001, momentum=0.9)

# Train and test each model
num_epochs = 5
train(model1, criterion, optimizer1, num_epochs)
accuracy1 = test(model1)
print("Model 1 - Accuracy: %.2f%%" % accuracy1)

train(model2, criterion, optimizer2, num_epochs)
accuracy2 = test(model2)
print("Model 2 - Accuracy: %.2f%%" % accuracy2)

train(model3, criterion, optimizer3, num_epochs)
accuracy3 = test(model3)
print("Model 3 - Accuracy: %.2f%%" % accuracy3)

train(model4, criterion, optimizer4, num_epochs)
accuracy4 = test(model4)
print("Model 4 - Accuracy: %.2f%%" % accuracy4)

train(model5, criterion, optimizer5, num_epochs)
accuracy5 = test(model5)
print("Model 5 - Accuracy: %.2f%%" % accuracy5)

# Comparison table
print("Model\t\tParameters\tAccuracy")
print("----------------------------------------------------")
print("Model 1\t\t", count_parameters(model1), "\t", accuracy1)
print("Model 2\t\t", count_parameters(model2), "\t", accuracy2)
print("Model 3\t\t", count_parameters(model3), "\t", accuracy3)
print("Model 4\t\t", count_parameters(model4), "\t", accuracy4)
print("Model 5\t\t", count_parameters(model5), "\t", accuracy5)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data\cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:45<00:00, 3759120.04it/s]


Extracting ./data\cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Model 1 - Parameters: 263306
Model 2 - Parameters: 1050890
Model 3 - Parameters: 66122
Model 4 - Parameters: 1053194
Model 5 - Parameters: 268650
Model 1 - Accuracy: 58.70%
Model 2 - Accuracy: 61.86%
Model 3 - Accuracy: 56.08%


ValueError: Expected input batch_size (128) to match target batch_size (32).

#### Question 3 - Train a Pure CNN with less than 10000 trainable parameters using the MNIST Dataset having minimum validation accuracy of 99.40%

**Note -**
1. Code comments should be given for proper code understanding.
2. Implement in both PyTorch and Tensorflow respectively

## Pytorch

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

# Set random seed for reproducibility
torch.manual_seed(42)

# Define the CNN model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=3)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=3)
        self.fc1 = nn.Linear(20 * 5 * 5, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = nn.functional.relu(nn.functional.max_pool2d(self.conv1(x), 2))
        x = nn.functional.relu(nn.functional.max_pool2d(self.conv2(x), 2))
        x = x.view(-1, 20 * 5 * 5)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return nn.functional.log_softmax(x, dim=1)

# Define the training and evaluation functions
def train(model, device, train_loader, optimizer, criterion):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

def evaluate(model, device, data_loader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for data, target in data_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            correct += (predicted == target).sum().item()
    accuracy = 100.0 * correct / len(data_loader.dataset)
    return accuracy

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load the MNIST dataset
train_dataset = datasets.MNIST('mnist_data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('mnist_data', train=False, download=True, transform=transform)

# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Create the model and move it to the device
model = CNN().to(device)

# Set the optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Training loop
epochs = 10
for epoch in range(1, epochs + 1):
    train(model, device, train_loader, optimizer, criterion)
    accuracy = evaluate(model, device, test_loader)
    print(f'Epoch {epoch}: Validation Accuracy = {accuracy:.2f}%')
    if accuracy >= 99.40:
        break

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to mnist_data\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 12565671.70it/s]


Extracting mnist_data\MNIST\raw\train-images-idx3-ubyte.gz to mnist_data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 28965971.74it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to mnist_data\MNIST\raw\train-labels-idx1-ubyte.gz
Extracting mnist_data\MNIST\raw\train-labels-idx1-ubyte.gz to mnist_data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz



100%|██████████| 1648877/1648877 [00:00<00:00, 12366434.50it/s]

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to mnist_data\MNIST\raw\t10k-images-idx3-ubyte.gz
Extracting mnist_data\MNIST\raw\t10k-images-idx3-ubyte.gz to mnist_data\MNIST\raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4557542.77it/s]

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to mnist_data\MNIST\raw\t10k-labels-idx1-ubyte.gz
Extracting mnist_data\MNIST\raw\t10k-labels-idx1-ubyte.gz to mnist_data\MNIST\raw






Epoch 1: Validation Accuracy = 97.74%
Epoch 2: Validation Accuracy = 98.26%
Epoch 3: Validation Accuracy = 98.13%
Epoch 4: Validation Accuracy = 98.68%
Epoch 5: Validation Accuracy = 98.78%
Epoch 6: Validation Accuracy = 98.72%
Epoch 7: Validation Accuracy = 98.79%
Epoch 8: Validation Accuracy = 98.79%
Epoch 9: Validation Accuracy = 99.01%
Epoch 10: Validation Accuracy = 98.78%


## tensorflow

In [6]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist

# Load the MNIST dataset and split it into training and testing sets
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize pixel values to the range [0, 1]
train_images = train_images / 255.0
test_images = test_images / 255.0

# Reshape the input data to have a single channel
train_images = train_images.reshape(-1, 28, 28, 1)
test_images = test_images.reshape(-1, 28, 28, 1)

# Set random seed for reproducibility
tf.random.set_seed(42)

# Define the CNN model
model = models.Sequential([
    layers.Conv2D(10, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(20, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(50, activation='relu'),
    layers.Dense(10, activation='softmax')
])

# Print the model summary
model.summary()

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Training loop
epochs = 10
for epoch in range(1, epochs + 1):
    model.fit(train_images, train_labels, epochs=1, verbose=0)
    _, accuracy = model.evaluate(test_images, test_labels, verbose=0)
    print(f'Epoch {epoch}: Validation Accuracy = {accuracy * 100:.2f}%')
    if accuracy >= 0.994:
        break

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 10)        100       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 13, 13, 10)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 11, 11, 20)        1820      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 5, 5, 20)         0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 500)               0         
                                                                 
 dense (Dense)               (None, 50)                2