In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


# MLP

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [None]:
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 76778223.36it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [None]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.flatten = nn.Flatten()
        self.fc_layers = nn.Sequential(
            nn.Linear(3072, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        x = self.fc_layers(x)
        return x

model = MLP()

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 100 == 99:
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')


[1,   100] loss: 0.016
[1,   200] loss: 0.013
[1,   300] loss: 0.015
[1,   400] loss: 0.016
[1,   500] loss: 0.016
[1,   600] loss: 0.017
[1,   700] loss: 0.017
[2,   100] loss: 0.016
[2,   200] loss: 0.015
[2,   300] loss: 0.014
[2,   400] loss: 0.014
[2,   500] loss: 0.016
[2,   600] loss: 0.015
[2,   700] loss: 0.016
[3,   100] loss: 0.013
[3,   200] loss: 0.015
[3,   300] loss: 0.016
[3,   400] loss: 0.017
[3,   500] loss: 0.017
[3,   600] loss: 0.017
[3,   700] loss: 0.016
[4,   100] loss: 0.014
[4,   200] loss: 0.014
[4,   300] loss: 0.015
[4,   400] loss: 0.016
[4,   500] loss: 0.014
[4,   600] loss: 0.016
[4,   700] loss: 0.015
[5,   100] loss: 0.013
[5,   200] loss: 0.013
[5,   300] loss: 0.012
[5,   400] loss: 0.015
[5,   500] loss: 0.015
[5,   600] loss: 0.017
[5,   700] loss: 0.017
[6,   100] loss: 0.012
[6,   200] loss: 0.012
[6,   300] loss: 0.013
[6,   400] loss: 0.013
[6,   500] loss: 0.016
[6,   600] loss: 0.015
[6,   700] loss: 0.014
[7,   100] loss: 0.016
[7,   200] 

In [None]:
correct = 0
total = 0
total_loss = 0.0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = model(images)
        loss = criterion(outputs, labels)
        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
average_loss = total_loss / len(test_loader)

In [None]:
print(f'Accuracy of the network on the 10000 test images: {accuracy:.2f}%')
print(f'Average loss on the test set: {average_loss:.4f}')

Accuracy of the network on the 10000 test images: 52.16%
Average loss on the test set: 3.6983


**Increasing number of epochs**

In [None]:
for epoch in range(15):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 100 == 99:
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')

[1,   100] loss: 0.012
[1,   200] loss: 0.011
[1,   300] loss: 0.012
[1,   400] loss: 0.014
[1,   500] loss: 0.013
[1,   600] loss: 0.015
[1,   700] loss: 0.014
[2,   100] loss: 0.012
[2,   200] loss: 0.012
[2,   300] loss: 0.013
[2,   400] loss: 0.013
[2,   500] loss: 0.013
[2,   600] loss: 0.014
[2,   700] loss: 0.013
[3,   100] loss: 0.011
[3,   200] loss: 0.012
[3,   300] loss: 0.011
[3,   400] loss: 0.012
[3,   500] loss: 0.013
[3,   600] loss: 0.013
[3,   700] loss: 0.013
[4,   100] loss: 0.010
[4,   200] loss: 0.010
[4,   300] loss: 0.013
[4,   400] loss: 0.013
[4,   500] loss: 0.014
[4,   600] loss: 0.014
[4,   700] loss: 0.013
[5,   100] loss: 0.011
[5,   200] loss: 0.010
[5,   300] loss: 0.011
[5,   400] loss: 0.011
[5,   500] loss: 0.011
[5,   600] loss: 0.013
[5,   700] loss: 0.012
[6,   100] loss: 0.011
[6,   200] loss: 0.011
[6,   300] loss: 0.012
[6,   400] loss: 0.012
[6,   500] loss: 0.012
[6,   600] loss: 0.013
[6,   700] loss: 0.013
[7,   100] loss: 0.010
[7,   200] 

In [None]:
correct = 0
total = 0
total_loss = 0.0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = model(images)
        loss = criterion(outputs, labels)
        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
average_loss = total_loss / len(test_loader)
print(f'Accuracy of the network on the 10000 test images: {accuracy:.2f}%')
print(f'Average loss on the test set: {average_loss:.4f}')

Accuracy of the network on the 10000 test images: 52.31%
Average loss on the test set: 4.4260


**Trying different parameters**

In [None]:
class VariedMLP(nn.Module):
    def __init__(self):
        super(VariedMLP, self).__init__()
        self.flatten = nn.Flatten()
        self.fc_layers = nn.Sequential(
            nn.Linear(3072, 2048),
            nn.LeakyReLU(negative_slope=0.01),
            nn.Dropout(0.2),
            nn.Linear(2048, 512),
            nn.LeakyReLU(negative_slope=0.01),
            nn.Dropout(0.2),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        x = self.fc_layers(x)
        return x

model = VariedMLP()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
criterion = nn.CrossEntropyLoss()

In [None]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:
            print(f'Epoch {epoch + 1}, Batch {i + 1}, Loss: {running_loss / 100:.4f}')
            running_loss = 0.0

print('Finished Training')

Epoch 1, Batch 100, Loss: 2.0584
Epoch 1, Batch 200, Loss: 1.7826
Epoch 1, Batch 300, Loss: 1.6973
Epoch 1, Batch 400, Loss: 1.6711
Epoch 1, Batch 500, Loss: 1.6004
Epoch 1, Batch 600, Loss: 1.6158
Epoch 1, Batch 700, Loss: 1.5400
Epoch 2, Batch 100, Loss: 1.5181
Epoch 2, Batch 200, Loss: 1.4636
Epoch 2, Batch 300, Loss: 1.4773
Epoch 2, Batch 400, Loss: 1.4537
Epoch 2, Batch 500, Loss: 1.4752
Epoch 2, Batch 600, Loss: 1.4313
Epoch 2, Batch 700, Loss: 1.4555
Epoch 3, Batch 100, Loss: 1.3802
Epoch 3, Batch 200, Loss: 1.3715
Epoch 3, Batch 300, Loss: 1.3545
Epoch 3, Batch 400, Loss: 1.3522
Epoch 3, Batch 500, Loss: 1.3727
Epoch 3, Batch 600, Loss: 1.3651
Epoch 3, Batch 700, Loss: 1.3363
Epoch 4, Batch 100, Loss: 1.2670
Epoch 4, Batch 200, Loss: 1.2735
Epoch 4, Batch 300, Loss: 1.2957
Epoch 4, Batch 400, Loss: 1.2762
Epoch 4, Batch 500, Loss: 1.2844
Epoch 4, Batch 600, Loss: 1.2847
Epoch 4, Batch 700, Loss: 1.2736
Epoch 5, Batch 100, Loss: 1.1956
Epoch 5, Batch 200, Loss: 1.1910
Epoch 5, B

In [None]:
model.eval()
correct = 0
total = 0
total_loss = 0.0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = model(images)
        loss = criterion(outputs, labels)
        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
average_loss = total_loss / len(test_loader)

print(f'Accuracy of the model on the 10000 test images: {accuracy:.2f}%')
print(f'Average loss on the test set: {average_loss:.4f}')

Accuracy of the model on the 10000 test images: 55.45%
Average loss on the test set: 1.3045


**Analysis**

**Model 1:**

**Performance (10 epochs):** Achieved 52.16% accuracy with an average loss of 3.6983 on the test set.

**Performance (15 epochs):** Slight improvement in accuracy to 52.31% but with an increased average loss of 4.4260, suggesting potential overfitting or that the model has reached its performance capacity on the dataset.

**Model 2:**

**Performance (10 epochs):** This model shows a notable improvement, reaching 55.45% accuracy with a significantly lower average loss of 1.3045 on the test set.

**Key Takeaways:**

The introduction of dropout and a wider network in Model 2 helped in reducing overfitting and improved model generalization, as evidenced by both higher accuracy and lower loss.

Switching from ReLU to LeakyReLU and from Adam to SGD with momentum also seemed to positively impact the model's ability to learn from the CIFAR-10 dataset.
These results highlight the significance of architectural choices, activation functions, and optimizers in designing neural networks, especially for complex tasks like image classification.

# CNN

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 100 == 99:
            print(f'Epoch {epoch + 1}, Batch {i + 1}, Loss: {running_loss / 100:.4f}')
            running_loss = 0.0
print('Finished Training')


Epoch 1, Batch 100, Loss: 1.8995
Epoch 1, Batch 200, Loss: 1.5324
Epoch 1, Batch 300, Loss: 1.3983
Epoch 1, Batch 400, Loss: 1.3119
Epoch 1, Batch 500, Loss: 1.2333
Epoch 1, Batch 600, Loss: 1.2014
Epoch 1, Batch 700, Loss: 1.1278
Epoch 2, Batch 100, Loss: 0.9941
Epoch 2, Batch 200, Loss: 0.9534
Epoch 2, Batch 300, Loss: 0.9209
Epoch 2, Batch 400, Loss: 0.8972
Epoch 2, Batch 500, Loss: 0.8957
Epoch 2, Batch 600, Loss: 0.8583
Epoch 2, Batch 700, Loss: 0.8379
Epoch 3, Batch 100, Loss: 0.7267
Epoch 3, Batch 200, Loss: 0.7316
Epoch 3, Batch 300, Loss: 0.7089
Epoch 3, Batch 400, Loss: 0.7078
Epoch 3, Batch 500, Loss: 0.7004
Epoch 3, Batch 600, Loss: 0.6888
Epoch 3, Batch 700, Loss: 0.7061
Epoch 4, Batch 100, Loss: 0.5640
Epoch 4, Batch 200, Loss: 0.5895
Epoch 4, Batch 300, Loss: 0.5784
Epoch 4, Batch 400, Loss: 0.5726
Epoch 4, Batch 500, Loss: 0.5865
Epoch 4, Batch 600, Loss: 0.5638
Epoch 4, Batch 700, Loss: 0.5478
Epoch 5, Batch 100, Loss: 0.4353
Epoch 5, Batch 200, Loss: 0.4285
Epoch 5, B

In [None]:
model.eval()
correct = 0
total = 0
total_loss = 0.0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = model(images)
        loss = criterion(outputs, labels)
        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
average_loss = total_loss / len(test_loader)
print(f'Accuracy of the model on the 10000 test images: {accuracy:.2f}%')
print(f'Average loss on the test set: {average_loss:.4f}')


Accuracy of the model on the 10000 test images: 75.08%
Average loss on the test set: 1.1649


**Trying different parameters**

In [None]:
class UpdatedCNN(nn.Module):
    def __init__(self):
        super(UpdatedCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(256 * 2 * 2, 1024)
        self.fc2 = nn.Linear(1024, 10)
        self.elu = nn.ELU()

    def forward(self, x):
        x = self.pool(self.elu(self.conv1(x)))
        x = self.pool(self.elu(self.conv2(x)))
        x = self.pool(self.elu(self.conv3(x)))
        x = self.pool(self.elu(self.conv4(x)))
        x = x.view(-1, 256 * 2 * 2)
        x = self.elu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
model = UpdatedCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:
            print(f'Epoch {epoch + 1}, Batch {i + 1}, Loss: {running_loss / 100:.4f}')
            running_loss = 0.0

print('Finished Training')

Epoch 1, Batch 100, Loss: 2.2043
Epoch 1, Batch 200, Loss: 1.9058
Epoch 1, Batch 300, Loss: 1.6636
Epoch 1, Batch 400, Loss: 1.5319
Epoch 1, Batch 500, Loss: 1.4456
Epoch 1, Batch 600, Loss: 1.3909
Epoch 1, Batch 700, Loss: 1.3313
Epoch 2, Batch 100, Loss: 1.1946
Epoch 2, Batch 200, Loss: 1.1452
Epoch 2, Batch 300, Loss: 1.1211
Epoch 2, Batch 400, Loss: 1.0704
Epoch 2, Batch 500, Loss: 1.0342
Epoch 2, Batch 600, Loss: 1.0111
Epoch 2, Batch 700, Loss: 0.9799
Epoch 3, Batch 100, Loss: 0.8553
Epoch 3, Batch 200, Loss: 0.8424
Epoch 3, Batch 300, Loss: 0.8438
Epoch 3, Batch 400, Loss: 0.8589
Epoch 3, Batch 500, Loss: 0.8188
Epoch 3, Batch 600, Loss: 0.8625
Epoch 3, Batch 700, Loss: 0.8189
Epoch 4, Batch 100, Loss: 0.6596
Epoch 4, Batch 200, Loss: 0.6795
Epoch 4, Batch 300, Loss: 0.7023
Epoch 4, Batch 400, Loss: 0.6774
Epoch 4, Batch 500, Loss: 0.6647
Epoch 4, Batch 600, Loss: 0.6734
Epoch 4, Batch 700, Loss: 0.6558
Epoch 5, Batch 100, Loss: 0.5179
Epoch 5, Batch 200, Loss: 0.5272
Epoch 5, B

In [None]:
model.eval()
correct = 0
total = 0
total_loss = 0.0
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        loss = criterion(outputs, labels)
        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
average_loss = total_loss / len(test_loader)

print(f'Accuracy of the model on the 10000 test images: {accuracy:.2f}%')
print(f'Average loss on the test set: {average_loss:.4f}')

Accuracy of the model on the 10000 test images: 74.23%
Average loss on the test set: 1.1496


**Analysis**

**Model 1:**

Performance: Achieved an accuracy of 75.08% on the test set with an average loss of 1.1649.

**Model 2:**

Performance: Reached an accuracy of 74.23% on the test set with a slightly lower average loss of 1.1496.

**Key Takeaways:**

Model 2's increased complexity did not lead to higher accuracy compared to Model 1, suggesting that CIFAR-10 may not benefit from the added complexity or that further tuning is needed to leverage the model's full potential.

Model 2 achieved a slightly lower average loss, indicating it might make more confident predictions than Model 1, despite not achieving higher accuracy.

Using ELU in Model 2 did not provide a clear performance advantage

# VGG

In [None]:
import torch

if torch.cuda.is_available():
    print("CUDA is available. GPU will be used for training.")
else:
    print("CUDA is not available. Training will default to CPU.")


CUDA is available. GPU will be used for training.


In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torchvision import models
from torch.utils.data import DataLoader

In [None]:
model = models.vgg16(pretrained=True)

for param in model.features.parameters():
    param.requires_grad = False

num_features = model.classifier[6].in_features
model.classifier[6] = nn.Linear(num_features, 10)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:06<00:00, 85.7MB/s]


In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 63296285.84it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.classifier.parameters(), lr=0.001)

In [None]:
import torch

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

model = model.to(device)

GPU is available


In [None]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:
            print(f'Epoch {epoch+1}, Batch {i+1}, Loss: {running_loss / 100:.4f}')
            running_loss = 0.0

print('Finished Training')

Epoch 1, Batch 100, Loss: 0.9353
Epoch 1, Batch 200, Loss: 0.7295
Epoch 1, Batch 300, Loss: 0.6632
Epoch 1, Batch 400, Loss: 0.6559
Epoch 1, Batch 500, Loss: 0.7099
Epoch 1, Batch 600, Loss: 0.6595
Epoch 1, Batch 700, Loss: 0.6725
Epoch 2, Batch 100, Loss: 0.4828
Epoch 2, Batch 200, Loss: 0.4972
Epoch 2, Batch 300, Loss: 0.5199
Epoch 2, Batch 400, Loss: 0.5702
Epoch 2, Batch 500, Loss: 0.5291
Epoch 2, Batch 600, Loss: 0.5521
Epoch 2, Batch 700, Loss: 0.5264
Epoch 3, Batch 100, Loss: 0.4295
Epoch 3, Batch 200, Loss: 0.4136
Epoch 3, Batch 300, Loss: 0.4298
Epoch 3, Batch 400, Loss: 0.4237
Epoch 3, Batch 500, Loss: 0.4343
Epoch 3, Batch 600, Loss: 0.4393
Epoch 3, Batch 700, Loss: 0.4918
Epoch 4, Batch 100, Loss: 0.3033
Epoch 4, Batch 200, Loss: 0.3772
Epoch 4, Batch 300, Loss: 0.4080
Epoch 4, Batch 400, Loss: 0.3804
Epoch 4, Batch 500, Loss: 0.3923
Epoch 4, Batch 600, Loss: 0.4364
Epoch 4, Batch 700, Loss: 0.3975
Epoch 5, Batch 100, Loss: 0.2899
Epoch 5, Batch 200, Loss: 0.3568
Epoch 5, B

In [None]:
model.eval()
correct = 0
total = 0
total_loss = 0.0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
average_loss = total_loss / len(test_loader)

print(f'Accuracy of the model on the 10000 test images: {accuracy:.2f}%')
print(f'Average loss on the test set: {average_loss:.4f}')


Accuracy of the model on the 10000 test images: 87.51%
Average loss on the test set: 0.5710


# Conclusions

**Accuracy**

MLP (VariedMLP): Achieved 55.45% accuracy with an average loss of 1.3045.

CNN (SimpleCNN): Achieved 75.08% accuracy with an average loss of 1.1649.

VGG-based Transfer Learning: Achieved 87.51% accuracy with an average loss of 0.5710.

**Analysis**

The MLP model, despite its simplicity and adjustments for a higher capacity lagged in performance. This is due to MLPs treating input images as flat arrays, thus ignoring the spatial hierarchy and structure within images that are important for effective feature extraction.

The CNN model significantly outperformed the MLP model by leveraging convolutional layers. CNNs maintain the spatial relationships between pixels by applying filters that detect patterns and features

The VGG-based model, utilizing transfer learning, showed the highest accuracy. This improvement can be attributed to the VGG model's extensive training on ImageNet, a much larger and diverse dataset.

The VGG model, pre-trained on ImageNet and fine-tuned for CIFAR-10, not only achieved higher accuracy but also a significantly lower loss. This indicates better generalization and confidence in predictions compared to models trained from scratch.