# Task 1

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Define transformations for the training and testing data
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to 224x224 for AlexNet compatibility
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load the MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Load pre-trained ResNet18 and modify the first and final layers
import torchvision.models as models

resnet18 = models.resnet18(pretrained=False)
resnet18.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
resnet18.fc = nn.Linear(resnet18.fc.in_features, 10)

# Load pre-trained AlexNet and modify the first and final layers
alexnet = models.alexnet(pretrained=False)
alexnet.features[0] = nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2)
alexnet.classifier[6] = nn.Linear(alexnet.classifier[6].in_features, 10)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

resnet18 = resnet18.to(device)
alexnet = alexnet.to(device)
criterion = nn.CrossEntropyLoss()

# Optimizers
resnet18_optimizer = optim.Adam(resnet18.parameters(), lr=0.001)
alexnet_optimizer = optim.Adam(alexnet.parameters(), lr=0.001)

def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        epoch_loss = running_loss / len(train_loader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')
    return model

def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

# Train ResNet18
print("Training ResNet18...")
resnet18 = train_model(resnet18, train_loader, criterion, resnet18_optimizer)

# Train AlexNet
print("Training AlexNet...")
alexnet = train_model(alexnet, train_loader, criterion, alexnet_optimizer)

# Evaluate the models
resnet18_accuracy = evaluate_model(resnet18, test_loader)
alexnet_accuracy = evaluate_model(alexnet, test_loader)

print(f'ResNet18 Accuracy: {resnet18_accuracy:.2f}%')
print(f'AlexNet Accuracy: {alexnet_accuracy:.2f}%')


Training ResNet18...
Epoch [1/10], Loss: 0.0989


# Task 2

 Step 1: Define Models A and B

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Define Model A
class ModelA(nn.Module):
    def __init__(self, activation_fn=nn.ReLU()):
        super(ModelA, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(8 * 14 * 14, 64)
        self.fc2 = nn.Linear(64, 10)
        self.activation_fn = activation_fn

    def forward(self, x):
        x = self.pool(self.activation_fn(self.conv1(x)))
        x = x.view(-1, 8 * 14 * 14)
        x = self.activation_fn(self.fc1(x))
        x = self.fc2(x)
        return x

# Define Model B
class ModelB(nn.Module):
    def __init__(self):
        super(ModelB, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(16 * 7 * 7, 64)  # Adjusted the dimensions here
        self.fc2 = nn.Linear(64, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 16 * 7 * 7)  # Adjusted the dimensions here
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x


Step 2: Prepare the MNIST Dataset

In [11]:
# Data loading and transformation
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=1000, shuffle=False)

 Step 3: Define Training and Testing Functions

In [12]:
# Training and testing functions
def train(model, device, train_loader, optimizer, criterion, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')

def test(model, device, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n')



Step 4: Train Model A and Model B

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Training Model A

print("Training Model A with ReLU")
modelA = ModelA(nn.ReLU()).to(device)
optimizerA = optim.Adam(modelA.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

for epoch in range(1, 11):
    train(modelA, device, train_loader, optimizerA, criterion, epoch)
    test(modelA, device, test_loader, criterion)

# Training Model B
print("\nTraining Model B with ReLU")
modelB = ModelB().to(device)
criterion = nn.CrossEntropyLoss()
optimizerB = optim.Adam(modelB.parameters(), lr=0.001)

# Training the model
for epoch in range(1, 11):
    train(modelB, device, train_loader, optimizerB, criterion, epoch)
    test(modelB, device, test_loader, criterion)


Training Model A with ReLU

Test set: Average loss: 0.0001, Accuracy: 9674/10000 (97%)


Test set: Average loss: 0.0001, Accuracy: 9730/10000 (97%)


Test set: Average loss: 0.0001, Accuracy: 9790/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9795/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9810/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9804/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9819/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9819/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9834/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9843/10000 (98%)


Training Model B with ReLU

Test set: Average loss: 0.0001, Accuracy: 9735/10000 (97%)


Test set: Average loss: 0.0001, Accuracy: 9829/10000 (98%)


Test set: Average loss: 0.0000, Accuracy: 9831/10000 (98%)


Test set: Average loss: 0.0000, Accuracy: 9862/10000 (99%)


Test set: Average loss: 0.0000, Accuracy: 9857/10000 (99%)


Test set: Average loss: 0.000

Step 5: Train Model A with Different Activation Functions

b. Training Model A with Sigmoid

In [14]:
modelA_sigmoid = ModelA(activation_fn=nn.Sigmoid()).to(device)
optimizerA_sigmoid = optim.Adam(modelA_sigmoid.parameters(), lr=0.001)

for epoch in range(1, 11):
    train(modelA_sigmoid, device, train_loader, optimizerA_sigmoid, criterion, epoch)
    test(modelA_sigmoid, device, test_loader, criterion)




Test set: Average loss: 0.0003, Accuracy: 9180/10000 (92%)


Test set: Average loss: 0.0002, Accuracy: 9396/10000 (94%)


Test set: Average loss: 0.0002, Accuracy: 9474/10000 (95%)


Test set: Average loss: 0.0002, Accuracy: 9505/10000 (95%)


Test set: Average loss: 0.0001, Accuracy: 9612/10000 (96%)


Test set: Average loss: 0.0001, Accuracy: 9628/10000 (96%)


Test set: Average loss: 0.0001, Accuracy: 9641/10000 (96%)


Test set: Average loss: 0.0001, Accuracy: 9627/10000 (96%)


Test set: Average loss: 0.0001, Accuracy: 9684/10000 (97%)


Test set: Average loss: 0.0001, Accuracy: 9695/10000 (97%)



c. Training Model A with Tanh

In [15]:
modelA_tanh = ModelA(activation_fn=nn.Tanh()).to(device)
optimizerA_tanh = optim.Adam(modelA_tanh.parameters(), lr=0.001)

for epoch in range(1, 11):
    train(modelA_tanh, device, train_loader, optimizerA_tanh, criterion, epoch)
    test(modelA_tanh, device, test_loader, criterion)



Test set: Average loss: 0.0001, Accuracy: 9580/10000 (96%)


Test set: Average loss: 0.0001, Accuracy: 9704/10000 (97%)


Test set: Average loss: 0.0001, Accuracy: 9763/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9782/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9783/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9789/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9810/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9812/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9814/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9820/10000 (98%)



 Effect of Optimizer Learning Rate on Model B

a. Learning Rate 0.1

In [16]:
modelB_lr_0_1 = ModelB().to(device)
optimizerB_lr_0_1 = optim.Adam(modelB_lr_0_1.parameters(), lr=0.1)

for epoch in range(1, 11):
    train(modelB_lr_0_1, device, train_loader, optimizerB_lr_0_1, criterion, epoch)
    test(modelB_lr_0_1, device, test_loader, criterion)



Test set: Average loss: 0.0023, Accuracy: 1010/10000 (10%)


Test set: Average loss: 0.0023, Accuracy: 1135/10000 (11%)


Test set: Average loss: 0.0023, Accuracy: 980/10000 (10%)


Test set: Average loss: 0.0023, Accuracy: 1135/10000 (11%)


Test set: Average loss: 0.0023, Accuracy: 1135/10000 (11%)


Test set: Average loss: 0.0023, Accuracy: 980/10000 (10%)


Test set: Average loss: 0.0023, Accuracy: 1135/10000 (11%)


Test set: Average loss: 0.0023, Accuracy: 1028/10000 (10%)


Test set: Average loss: 0.0023, Accuracy: 1135/10000 (11%)


Test set: Average loss: 0.0023, Accuracy: 974/10000 (10%)



b. Learning Rate 0.01

In [17]:
modelB_lr_0_01 = ModelB().to(device)
optimizerB_lr_0_01 = optim.Adam(modelB_lr_0_01.parameters(), lr=0.01)

for epoch in range(1, 11):
    train(modelB_lr_0_01, device, train_loader, optimizerB_lr_0_01, criterion, epoch)
    test(modelB_lr_0_01, device, test_loader, criterion)



Test set: Average loss: 0.0001, Accuracy: 9755/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9776/10000 (98%)


Test set: Average loss: 0.0000, Accuracy: 9847/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9818/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9832/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9784/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9808/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9809/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9827/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9844/10000 (98%)

