<a href="https://colab.research.google.com/github/suhas-bvp/session6/blob/master/experiment1_MNIST_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F

In [None]:
# 1. Define the CNN Model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()

        self.features = nn.Sequential(
            # First convolutional layer
            nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, stride=1, padding=1), # Reduced from 32 to 8 filters
            nn.BatchNorm2d(8),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            #test
            nn.Conv2d(in_channels=8, out_channels=10, kernel_size=3, stride=1, padding=1), # Reduced from 32 to 8 filters
            nn.BatchNorm2d(10),
            nn.ReLU(),
            nn.Dropout2d(0.01),

            # Second convolutional layer
            nn.Conv2d(in_channels=10, out_channels=16, kernel_size=3, stride=1, padding=1), # Reduced from 64 to 16 filters, input from conv1
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # Reduces image size by half (14x14 -> 7x7)
            nn.Dropout2d(0.05),


            #test
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, stride=1, padding=1), # Reduced from 32 to 8 filters
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Dropout2d(0.05),

            # Third convolutional layer
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, stride=1, padding=1), # Reduced from 32*4 to 32 filters, input from conv1
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # Reduces image size by half (7x7 -> 3x3)
            nn.Dropout2d(0.1),
        )

        self.classifier = nn.Sequential(
            nn.Linear(16 * 3 * 3, 10) # Reduced output features from 32 to 10
        )

        self.gap = nn.Sequential(
            nn.AvgPool2d(kernel_size=3) # 7>> 9... nn.AdaptiveAvgPool((1, 1))
        ) # output_size = 1

    def forward(self, x):
        x = self.features(x)
        x = x.view(-1,16 * 3 * 3)
        x = self.classifier(x)
        #x = self.gap(x)
        return F.log_softmax(x, dim=1)


In [None]:

# 2. Load and Prepare Data
def get_mnist_dataloaders(batch_size=64):
    transform = transforms.Compose([
        #transforms.RandomRotation(degrees=(-15, 15)),  # Rotates the image by a random angle between -15 and 15 degrees
       # transforms.RandomHorizontalFlip(),
        transforms.ToTensor(), # Convert image to PyTorch Tensor
        transforms.Normalize((0.1307,), (0.3081,)) # Normalize pixel values
        #transforms.Normalize((0.5,), (0.5,)) # Normalize pixel values
    ])

    train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
    test_dataset = datasets.MNIST('./data', train=False, download=True, transform=transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader


In [None]:

# 3. Training Function
def train_model(model, train_loader, optimizer, criterion, device, epochs=5):
    model.train() # Set the model to training mode
    for epoch in range(epochs):
        running_loss = 0.0
        total_samples=0.0
        correct_predictions=0.0
        epoch_accuracy=0.0

        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)

            optimizer.zero_grad() # Clear gradients
            output = model(data) # Forward pass
            loss = criterion(output, target) # Calculate loss
            loss.backward() # Backward pass (compute gradients)
            optimizer.step() # Update weights
            running_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(output.data, 1) # Get the class with the highest score
            total_samples += target.size(0)
            correct_predictions += (predicted == target).sum().item()
            epoch_accuracy = (correct_predictions / total_samples) * 100

            #if batch_idx % 100 == 0: # Print loss every 100 batches
            #    print(f'Epoch: {epoch+1}, Batch: {batch_idx}/{len(train_loader)}, Loss: {loss.item():.4f}')

        # Evaluate the model  after each epoc
        test_accuracy = evaluate_model(model, test_loader, device)
        print(f'Epoch {epoch+1} finished. Average Loss: {running_loss/len(train_loader):.4f} && Training Accuracy: {epoch_accuracy:.2f}%. Test Accuracy: {test_accuracy:.2f}')




# 4. Evaluation Function
def evaluate_model(model, test_loader, device):
    model.eval() # Set the model to evaluation mode
    correct = 0
    total = 0
    accuracy=0.0
    with torch.no_grad(): # Disable gradient calculation during evaluation
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = torch.max(output.data, 1) # Get the index of the max log-probability
            total += target.size(0)
            correct += (predicted == target).sum().item()
    accuracy = 100 * correct / total

    #print(f'Accuracy on test set: {accuracy:.2f}%')
    return accuracy

# 5. Parameter Count Function
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)



In [None]:

# Main Execution Block
if __name__ == "__main__":
    # Check for GPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Hyperparameters
    BATCH_SIZE = 64
    LEARNING_RATE = 0.01
    EPOCHS = 20

    # Get data loaders
    train_loader, test_loader = get_mnist_dataloaders(BATCH_SIZE)

    # Initialize model
    model = SimpleCNN().to(device)

    # Print parameter count
    param_count = count_parameters(model)
    print(f"Total trainable parameters: {param_count}")

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)

    # Train the model
    print("\nStarting training...")
    train_model(model, train_loader, optimizer, criterion, device, EPOCHS)
    print("Training complete.")


Using device: cuda
Total trainable parameters: 8488

Starting training...
Epoch 1 finished. Average Loss: 0.4201 && Training Accuracy: 89.27%. Test Accuracy: 96.73
Epoch 2 finished. Average Loss: 0.0844 && Training Accuracy: 97.42%. Test Accuracy: 98.40
Epoch 3 finished. Average Loss: 0.0571 && Training Accuracy: 98.25%. Test Accuracy: 98.35
Epoch 4 finished. Average Loss: 0.0446 && Training Accuracy: 98.59%. Test Accuracy: 98.99
Epoch 5 finished. Average Loss: 0.0375 && Training Accuracy: 98.80%. Test Accuracy: 98.82
Epoch 6 finished. Average Loss: 0.0314 && Training Accuracy: 99.00%. Test Accuracy: 98.68
Epoch 7 finished. Average Loss: 0.0277 && Training Accuracy: 99.11%. Test Accuracy: 98.70
Epoch 8 finished. Average Loss: 0.0249 && Training Accuracy: 99.19%. Test Accuracy: 99.01
Epoch 9 finished. Average Loss: 0.0218 && Training Accuracy: 99.30%. Test Accuracy: 99.02
Epoch 10 finished. Average Loss: 0.0203 && Training Accuracy: 99.34%. Test Accuracy: 98.90
Epoch 11 finished. Averag