In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
    
# Define relevant variables
batch_size = 64
num_classes = 10
learning_rate = 0.001
num_epochs = 10
    
# Device will determine whether to run the training on GPU or CPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print("Training on device:", device)
print("CUDA is available:", torch.cuda.is_available())
print("Current device:", torch.cuda.current_device())
print("Device name:", torch.cuda.get_device_name(0))

Training on device: cuda
CUDA is available: True
Current device: 0
Device name: NVIDIA GeForce RTX 4060 Laptop GPU


In [60]:
class LeNet5(nn.Module):
    def __init__(self, num_classes=10):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5, stride=1, padding=2)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(16 * 6 * 6, 120) # Corrected input size
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(120, 84)
        self.relu4 = nn.ReLU()
        self.fc3 = nn.Linear(84, num_classes)

    def forward(self, x):
        #print(f"Input shape: {x.shape}")
        x = self.pool1(self.relu1(self.conv1(x)))
        #print(f"after conv 1: {x.shape}")
        x = self.pool2(self.relu2(self.conv2(x)))
        #print(f"after conv 2: {x.shape}")
        x = x.view(-1, 16 * 6 * 6) # Corrected flattening
        #print(f"flat: {x.shape}")
        x = self.relu3(self.fc1(x))
        #print(f"fc_1: {x.shape}")
        x = self.relu4(self.fc2(x))
        #print(f"fc_2: {x.shape}")
        x = self.fc3(x)
        #print(f"fc_3: {x.shape}")
        return x

class LeNet5Dropout(nn.Module):
    def __init__(self, num_classes=10, dropout_prob=0.5):
        super(LeNet5Dropout, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5, stride=1, padding=2)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(16 * 6 * 6, 120) # Corrected input size
        self.relu3 = nn.ReLU()
        self.dropout = nn.Dropout(dropout_prob)
        self.fc2 = nn.Linear(120, 84)
        self.relu4 = nn.ReLU()
        self.fc3 = nn.Linear(84, num_classes)

    def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = x.view(-1, 16 * 6 * 6) # Corrected flattening
        x = self.relu3(self.fc1(x))
        x = self.dropout(x)
        x = self.relu4(self.fc2(x))
        x = self.fc3(x)
        return x

class LeNet5BatchNorm(nn.Module):
    def __init__(self, num_classes=10):
        super(LeNet5BatchNorm, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5, stride=1, padding=2)
        #self.bn1 = nn.BatchNorm2d(6)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0)
        #self.bn2 = nn.BatchNorm2d(16)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(16 * 6 * 6, 120) # Corrected input size
        #self.bn3 = nn.BatchNorm1d(120)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(120, 84)
        self.bn4 = nn.BatchNorm1d(84)
        self.relu4 = nn.ReLU()
        self.fc3 = nn.Linear(84, num_classes)

    def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = x.view(-1, 16 * 6 * 6) # Corrected flattening
        x = self.relu3(self.fc1(x))
        x = self.relu4(self.bn4(self.fc2(x)))
        x = self.fc3(x)
        return x

In [26]:
model = LeNet5()
dummy_input = torch.randn(1, 3, 32, 32)  # Batch size=1
print(model(dummy_input).shape)  # Should output (1, 10)

torch.Size([1, 10])


In [55]:
# Load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=64,
                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = DataLoader(testset, batch_size=64,
                         shuffle=False, num_workers=2)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()

In [56]:
def train_network(model, trainloader, criterion, optimizer, num_epochs=20, device='cuda'):
    model.to(device)
    for epoch in tqdm(range(num_epochs), desc="Epochs"):
        epoch_loss = 0.0
        for i, data in enumerate(trainloader):
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f'Epoch {epoch + 1} loss: {epoch_loss / len(trainloader):.3f}')
    print('Finished Training')

def evaluate_network(model, testloader, device='cuda'):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.2f}%')
    model.train()
    return accuracy

In [57]:
# 1) Network without dropout/batch normalization
print("\n--- Training LeNet-5 without Dropout/BatchNorm ---")
model_plain = LeNet5()
optimizer_plain = optim.Adam(model_plain.parameters(), lr=0.002)
train_network(model_plain, trainloader, criterion, optimizer_plain, num_epochs=10, device=device)
evaluate_network(model_plain, testloader, device=device)


--- Training LeNet-5 without Dropout/BatchNorm ---


Epochs:  10%|███▌                                | 1/10 [00:02<00:23,  2.61s/it]

Epoch 1 loss: 1.568


Epochs:  20%|███████▏                            | 2/10 [00:05<00:20,  2.53s/it]

Epoch 2 loss: 1.287


Epochs:  30%|██████████▊                         | 3/10 [00:07<00:17,  2.57s/it]

Epoch 3 loss: 1.154


Epochs:  40%|██████████████▍                     | 4/10 [00:10<00:15,  2.66s/it]

Epoch 4 loss: 1.059


Epochs:  50%|██████████████████                  | 5/10 [00:13<00:13,  2.63s/it]

Epoch 5 loss: 0.989


Epochs:  60%|█████████████████████▌              | 6/10 [00:15<00:10,  2.62s/it]

Epoch 6 loss: 0.926


Epochs:  70%|█████████████████████████▏          | 7/10 [00:18<00:08,  2.70s/it]

Epoch 7 loss: 0.876


Epochs:  80%|████████████████████████████▊       | 8/10 [00:21<00:05,  2.70s/it]

Epoch 8 loss: 0.823


Epochs:  90%|████████████████████████████████▍   | 9/10 [00:23<00:02,  2.70s/it]

Epoch 9 loss: 0.783


Epochs: 100%|███████████████████████████████████| 10/10 [00:26<00:00,  2.67s/it]

Epoch 10 loss: 0.743
Finished Training





Accuracy: 63.05%


63.05

In [58]:
# 2) Network with one additional dropout layer
print("\n--- Training LeNet-5 with Dropout ---")
model_dropout = LeNet5Dropout(dropout_prob=0.5)
optimizer_dropout = optim.Adam(model_dropout.parameters(), lr=0.002)
train_network(model_dropout, trainloader, criterion, optimizer_dropout, num_epochs=10, device=device)
evaluate_network(model_dropout, testloader, device=device)


--- Training LeNet-5 with Dropout ---


Epochs:  10%|███▌                                | 1/10 [00:02<00:23,  2.59s/it]

Epoch 1 loss: 1.702


Epochs:  20%|███████▏                            | 2/10 [00:05<00:21,  2.63s/it]

Epoch 2 loss: 1.441


Epochs:  30%|██████████▊                         | 3/10 [00:07<00:18,  2.68s/it]

Epoch 3 loss: 1.332


Epochs:  40%|██████████████▍                     | 4/10 [00:10<00:16,  2.70s/it]

Epoch 4 loss: 1.269


Epochs:  50%|██████████████████                  | 5/10 [00:13<00:13,  2.72s/it]

Epoch 5 loss: 1.219


Epochs:  60%|█████████████████████▌              | 6/10 [00:16<00:11,  2.81s/it]

Epoch 6 loss: 1.179


Epochs:  70%|█████████████████████████▏          | 7/10 [00:19<00:08,  2.82s/it]

Epoch 7 loss: 1.144


Epochs:  80%|████████████████████████████▊       | 8/10 [00:22<00:05,  2.80s/it]

Epoch 8 loss: 1.128


Epochs:  90%|████████████████████████████████▍   | 9/10 [00:24<00:02,  2.77s/it]

Epoch 9 loss: 1.097


Epochs: 100%|███████████████████████████████████| 10/10 [00:27<00:00,  2.76s/it]

Epoch 10 loss: 1.082
Finished Training





Accuracy: 61.97%


61.97

In [61]:
# 3) Network with one additional batch normalization layer
print("\n--- Training LeNet-5 with Batch Normalization ---")
model_bn = LeNet5BatchNorm()
optimizer_bn = optim.Adam(model_bn.parameters(), lr=0.002)
train_network(model_bn, trainloader, criterion, optimizer_bn, num_epochs=10, device=device)
evaluate_network(model_bn, testloader, device=device)


--- Training LeNet-5 with Batch Normalization ---


Epochs:  10%|███▌                                | 1/10 [00:02<00:26,  2.97s/it]

Epoch 1 loss: 1.400


Epochs:  20%|███████▏                            | 2/10 [00:05<00:23,  2.93s/it]

Epoch 2 loss: 1.121


Epochs:  30%|██████████▊                         | 3/10 [00:08<00:20,  2.95s/it]

Epoch 3 loss: 1.023


Epochs:  40%|██████████████▍                     | 4/10 [00:11<00:17,  2.84s/it]

Epoch 4 loss: 0.950


Epochs:  50%|██████████████████                  | 5/10 [00:14<00:14,  2.80s/it]

Epoch 5 loss: 0.895


Epochs:  60%|█████████████████████▌              | 6/10 [00:16<00:11,  2.78s/it]

Epoch 6 loss: 0.850


Epochs:  70%|█████████████████████████▏          | 7/10 [00:19<00:08,  2.77s/it]

Epoch 7 loss: 0.809


Epochs:  80%|████████████████████████████▊       | 8/10 [00:22<00:05,  2.75s/it]

Epoch 8 loss: 0.774


Epochs:  90%|████████████████████████████████▍   | 9/10 [00:25<00:02,  2.76s/it]

Epoch 9 loss: 0.734


Epochs: 100%|███████████████████████████████████| 10/10 [00:28<00:00,  2.80s/it]

Epoch 10 loss: 0.704
Finished Training





Accuracy: 68.32%


68.32