# Step 1 Loading and Preprocessing the CIFAR-10 Dataset

In [16]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Data augmentation and normalization for the training set
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),  # Randomly flip images
    transforms.RandomCrop(32, padding=4),  # Randomly crop images
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize pixel values
])

# Only normalization for the validation/test set
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = DataLoader(testset, batch_size=64, shuffle=False)


Files already downloaded and verified
Files already downloaded and verified


# Step 2: Building a Simple CNN in PyTorch

In [17]:
import torch.nn as nn
import torch.nn.functional as F

# Define a simple CNN
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)  # Convolutional layer 1
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)  # Convolutional layer 2
        self.pool = nn.MaxPool2d(2, 2)  # Max pooling layer
        self.fc1 = nn.Linear(64 * 8 * 8, 512)  # Fully connected layer 1
        self.fc2 = nn.Linear(512, 10)  # Output layer for 10 classes (CIFAR-10)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # Apply conv1, ReLU, and pooling
        x = self.pool(F.relu(self.conv2(x)))  # Apply conv2, ReLU, and pooling
        x = x.view(-1, 64 * 8 * 8)  # Flatten the feature maps
        x = F.relu(self.fc1(x))  # Apply fully connected layer
        x = self.fc2(x)  # Output layer
        return x

# Instantiate the model, loss function, and optimizer
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


# Step 3: Training the CNN

In [18]:
# Training loop
for epoch in range(10):  # Train for 10 epochs
    running_loss = 0.0
    model.train()  # Set the model to training mode
    for images, labels in trainloader:
        optimizer.zero_grad()  # Zero the gradients
        outputs = model(images)  # Forward pass
        loss = criterion(outputs, labels)  # Compute the loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update model parameters

        running_loss += loss.item()
    
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(trainloader)}")


Epoch 1, Loss: 1.505565504741181
Epoch 2, Loss: 1.1579010714502895
Epoch 3, Loss: 1.0090941661763984
Epoch 4, Loss: 0.9180591698650205
Epoch 5, Loss: 0.8540969910219197
Epoch 6, Loss: 0.8055113941964591
Epoch 7, Loss: 0.7634095426486887
Epoch 8, Loss: 0.7283952946934249
Epoch 9, Loss: 0.7035878071623385
Epoch 10, Loss: 0.6811098975446218


# Step 4: Evaluating the CNN on the Validation/Test Set

In [19]:
correct = 0
total = 0
model.eval()  # Set the model to evaluation mode (no backpropagation)
with torch.no_grad():  # Disable gradient calculation during evaluation
    for images, labels in testloader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)  # Get the predicted class
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy on the test set: {100 * correct / total}%')


Accuracy on the test set: 75.88%


# Step 5: Adding Regularization Techniques

In [20]:
# CNN with Dropout
class CNNWithDropout(nn.Module):
    def __init__(self):
        super(CNNWithDropout, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 8 * 8, 512)
        self.dropout = nn.Dropout(0.5)  # Dropout layer with 50% dropout rate
        self.fc2 = nn.Linear(512, 10)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)  # Apply dropout
        x = self.fc2(x)
        return x

# Instantiate the model with dropout, loss function, and optimizer
model = CNNWithDropout()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [21]:
# Training loop
for epoch in range(10):  # Train for 10 epochs
    running_loss = 0.0
    model.train()  # Set the model to training mode
    for images, labels in trainloader:
        optimizer.zero_grad()  # Zero the gradients
        outputs = model(images)  # Forward pass
        loss = criterion(outputs, labels)  # Compute the loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update model parameters

        running_loss += loss.item()
    
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(trainloader)}")


Epoch 1, Loss: 1.5791191931270883
Epoch 2, Loss: 1.2593676598023271
Epoch 3, Loss: 1.1256146037670047
Epoch 4, Loss: 1.04739775933573
Epoch 5, Loss: 0.9940367937850221
Epoch 6, Loss: 0.9538737825877831
Epoch 7, Loss: 0.9179612662633667
Epoch 8, Loss: 0.8870001133445584
Epoch 9, Loss: 0.8666087887095063
Epoch 10, Loss: 0.8456562100469


In [22]:
correct = 0
total = 0
model.eval()  # Set the model to evaluation mode (no backpropagation)
with torch.no_grad():  # Disable gradient calculation during evaluation
    for images, labels in testloader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)  # Get the predicted class
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy on the test set: {100 * correct / total}%')


Accuracy on the test set: 74.8%


# Step 6: Transfer Learning

In [23]:
from torchvision import models

# Load a pre-trained ResNet model
model = models.resnet18(pretrained=True)

# Modify the last fully connected layer to match the 10 classes of CIFAR-10
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)


In [24]:
# # Freeze all layers except the last layer
# for param in model.parameters():
#     param.requires_grad = False

# # Allow training of the final fully connected layer
# for param in model.fc.parameters():
#     param.requires_grad = True


In [25]:
# Use a lower learning rate for fine-tuning
optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.0001)

# Fine-tuning loop
for epoch in range(10):
    running_loss = 0.0
    model.train()
    for images, labels in trainloader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(trainloader)}")


Epoch 1, Loss: 2.1202150810405116
Epoch 2, Loss: 1.8636915942896968
Epoch 3, Loss: 1.7885658609897583
Epoch 4, Loss: 1.7551391423510774
Epoch 5, Loss: 1.7341895911394787
Epoch 6, Loss: 1.7162512421912854
Epoch 7, Loss: 1.6983285474655267
Epoch 8, Loss: 1.6967481981457957
Epoch 9, Loss: 1.687846164111896
Epoch 10, Loss: 1.6888907362737924


In [26]:
correct = 0
total = 0
model.eval()  # Set the model to evaluation mode (no backpropagation)
with torch.no_grad():  # Disable gradient calculation during evaluation
    for images, labels in testloader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)  # Get the predicted class
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy on the test set: {100 * correct / total}%')


Accuracy on the test set: 40.17%


# Implementing LeNet

## Step 1: Implementing LeNet

In [27]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        # Convolutional Layers
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5, padding=2)  # Output: 32x32 -> 32x32
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)             # Output: 32x32 -> 28x28 -> 14x14

        # Update the input size for the first fully connected layer
        self.fc1 = nn.Linear(16 * 6 * 6, 120)  # Changed from 16 * 5 * 5 to 16 * 6 * 6
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        
        # Check the shape before flattening
        #print(x.shape)  # This will now output (batch_size, 16, 6, 6)
        
        x = x.view(x.size(0), -1)  # Automatically infers correct dimensions
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Instantiate LeNet
lenet_model = LeNet()
print(lenet_model)

LeNet(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [28]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class EnhancedLeNet(nn.Module):
    def __init__(self):
        super(EnhancedLeNet, self).__init__()
        # Convolutional Layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5, padding=2)  # Output: 32x32 -> 32x32
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)            # Output: 32x32 -> 28x28 -> 14x14

        # Fully Connected Layers
        self.fc1 = nn.Linear(64 * 6 * 6, 512)  # Adjusted for the output of conv2
        self.fc2 = nn.Linear(512, 120)
        self.fc3 = nn.Linear(120, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        
        # Check the shape before flattening
        #print(x.shape)  # Debugging
        
        x = x.view(x.size(0), -1)  # Automatically infers correct dimensions
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Instantiate the enhanced LeNet
lenet_model = EnhancedLeNet()


## Step 2: Training LeNet

In [29]:
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Data preparation (using CIFAR-10 as in the main chapter)
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(lenet_model.parameters(), lr=0.01, momentum=0.9)

# Training loop
epochs = 10
for epoch in range(epochs):
    running_loss = 0.0
    lenet_model.train()
    for images, labels in trainloader:
        optimizer.zero_grad()
        outputs = lenet_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {running_loss / len(trainloader)}")


Files already downloaded and verified
Files already downloaded and verified
Epoch 1/10, Loss: 1.7900719453611642
Epoch 2/10, Loss: 1.3896684212910244
Epoch 3/10, Loss: 1.17953988871611
Epoch 4/10, Loss: 1.0566183709732406
Epoch 5/10, Loss: 0.9646655843233514
Epoch 6/10, Loss: 0.8920416496598812
Epoch 7/10, Loss: 0.839506528719002
Epoch 8/10, Loss: 0.8036490207742852
Epoch 9/10, Loss: 0.7592372316533648
Epoch 10/10, Loss: 0.7243672685168893


## Step 3: Evaluating LeNet on the Test Set

In [30]:
correct = 0
total = 0
lenet_model.eval()
with torch.no_grad():
    for images, labels in testloader:
        outputs = lenet_model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"LeNet Accuracy on CIFAR-10 test set: {100 * correct / total:.2f}%")


LeNet Accuracy on CIFAR-10 test set: 73.86%
