## MNIST-V1: Minimal Model

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm 

# Load MNIST data
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Minimal model: Single linear layer
class MinimalModel(nn.Module):
    def __init__(self):
        super(MinimalModel, self).__init__()
        self.fc = nn.Linear(28 * 28, 10)  # Fully connected layer

    def forward(self, x):
        x = x.view(-1, 28 * 28)  # Flatten the input
        x = self.fc(x)
        return x

model = MinimalModel()

device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
total_parameters = sum(p.numel() for p in model.parameters())
print(f"Total Parameters : {total_parameters}")
print(model)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

def train_and_evaluate(model, device, train_loader, test_loader, epochs):
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        correct = 0
        total = 0

        # Training loop with progress bar
        with tqdm(train_loader, unit="batch") as tepoch:
            tepoch.set_description(f"Epoch {epoch + 1}/{epochs}")
            for images, labels in tepoch:
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                # Update tqdm bar
                tepoch.set_postfix(loss=train_loss / total, accuracy=100.0 * correct / total)

        train_accuracy = 100.0 * correct / total
        print(f"Training Loss: {train_loss / len(train_loader):.4f}, Training Accuracy: {train_accuracy:.2f}%")

        # Evaluate on the test set
        model.eval()
        test_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in tqdm(test_loader, desc="Testing", unit="batch"):
                outputs = model(images)
                loss = criterion(outputs, labels)

                test_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        test_accuracy = 100.0 * correct / total
        print(f"Test Loss: {test_loss / len(test_loader):.4f}, Test Accuracy: {test_accuracy:.2f}%\n")

# Train and evaluate the model
train_and_evaluate(model, device, train_loader, test_loader, epochs=2)


Total Parameters : 7850
MinimalModel(
  (fc): Linear(in_features=784, out_features=10, bias=True)
)


Epoch 1/2: 100%|██████████| 938/938 [00:02<00:00, 332.94batch/s, accuracy=80.2, loss=0.0153]


Training Loss: 0.9800, Training Accuracy: 80.17%


Testing: 100%|██████████| 157/157 [00:00<00:00, 620.94batch/s]


Test Loss: 0.6015, Test Accuracy: 86.68%



Epoch 2/2: 100%|██████████| 938/938 [00:02<00:00, 333.09batch/s, accuracy=86.6, loss=0.00863]


Training Loss: 0.5520, Training Accuracy: 86.64%


Testing: 100%|██████████| 157/157 [00:00<00:00, 601.28batch/s]

Test Loss: 0.4769, Test Accuracy: 88.46%






### MNIST-V2: 2-Layer Model, Add RELU

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm 

# Load MNIST data
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Minimal model: Single linear layer
class MinimalModel(nn.Module):
    def __init__(self):
        super(MinimalModel, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 128)  # Hidden layer
        self.fc2 = nn.Linear(128, 10)      # Output layer

    def forward(self, x):
        x = x.view(-1, 28 * 28)  # Flatten the input
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = MinimalModel()

device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
total_parameters = sum(p.numel() for p in model.parameters())
print(f"Total Parameters : {total_parameters}")
print(model)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

def train_and_evaluate(model, device, train_loader, test_loader, epochs):
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        correct = 0
        total = 0

        # Training loop with progress bar
        with tqdm(train_loader, unit="batch") as tepoch:
            tepoch.set_description(f"Epoch {epoch + 1}/{epochs}")
            for images, labels in tepoch:
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                # Update tqdm bar
                tepoch.set_postfix(loss=train_loss / total, accuracy=100.0 * correct / total)

        train_accuracy = 100.0 * correct / total
        print(f"Training Loss: {train_loss / len(train_loader):.4f}, Training Accuracy: {train_accuracy:.2f}%")

        # Evaluate on the test set
        model.eval()
        test_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in tqdm(test_loader, desc="Testing", unit="batch"):
                outputs = model(images)
                loss = criterion(outputs, labels)

                test_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        test_accuracy = 100.0 * correct / total
        print(f"Test Loss: {test_loss / len(test_loader):.4f}, Test Accuracy: {test_accuracy:.2f}%\n")

# Train and evaluate the model
train_and_evaluate(model, device, train_loader, test_loader, epochs=2)


Total Parameters : 101770
MinimalModel(
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


Epoch 1/2: 100%|██████████| 938/938 [00:03<00:00, 297.64batch/s, accuracy=73.9, loss=0.0195]


Training Loss: 1.2452, Training Accuracy: 73.95%


Testing: 100%|██████████| 157/157 [00:00<00:00, 542.25batch/s]


Test Loss: 0.5909, Test Accuracy: 86.08%



Epoch 2/2: 100%|██████████| 938/938 [00:03<00:00, 306.66batch/s, accuracy=87.2, loss=0.00781]


Training Loss: 0.4996, Training Accuracy: 87.23%


Testing: 100%|██████████| 157/157 [00:00<00:00, 537.35batch/s]

Test Loss: 0.4087, Test Accuracy: 89.13%






### MNIST-V3: Add Convolution Layer

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm 

# Load MNIST data
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Minimal model: Single linear layer
class MinimalModel(nn.Module):
    def __init__(self):
        super(MinimalModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(16 * 28 * 28, 128)  # Hidden layer
        self.fc2 = nn.Linear(128, 10)      # Output layer

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = x.view(x.size(0), -1)  # Flatten the input
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = MinimalModel()

device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
total_parameters = sum(p.numel() for p in model.parameters())
print(f"Total Parameters : {total_parameters}")
print(model)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

def train_and_evaluate(model, device, train_loader, test_loader, epochs):
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        correct = 0
        total = 0

        # Training loop with progress bar
        with tqdm(train_loader, unit="batch") as tepoch:
            tepoch.set_description(f"Epoch {epoch + 1}/{epochs}")
            for images, labels in tepoch:
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                # Update tqdm bar
                tepoch.set_postfix(loss=train_loss / total, accuracy=100.0 * correct / total)

        train_accuracy = 100.0 * correct / total
        print(f"Training Loss: {train_loss / len(train_loader):.4f}, Training Accuracy: {train_accuracy:.2f}%")

        # Evaluate on the test set
        model.eval()
        test_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in tqdm(test_loader, desc="Testing", unit="batch"):
                outputs = model(images)
                loss = criterion(outputs, labels)

                test_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        test_accuracy = 100.0 * correct / total
        print(f"Test Loss: {test_loss / len(test_loader):.4f}, Test Accuracy: {test_accuracy:.2f}%\n")

# Train and evaluate the model
train_and_evaluate(model, device, train_loader, test_loader, epochs=2)


Total Parameters : 1607210
MinimalModel(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=12544, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


Epoch 1/2: 100%|██████████| 938/938 [00:09<00:00, 100.92batch/s, accuracy=85.5, loss=0.00895]


Training Loss: 0.5727, Training Accuracy: 85.50%


Testing: 100%|██████████| 157/157 [00:00<00:00, 254.91batch/s]


Test Loss: 0.2912, Test Accuracy: 91.62%



Epoch 2/2: 100%|██████████| 938/938 [00:09<00:00, 101.59batch/s, accuracy=91.8, loss=0.00434]


Training Loss: 0.2779, Training Accuracy: 91.84%


Testing: 100%|██████████| 157/157 [00:00<00:00, 273.96batch/s]

Test Loss: 0.2329, Test Accuracy: 93.04%






### MNIST-V4: Add pooling and more convolution layer

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm 

# Load MNIST data
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Minimal model: Single linear layer
class MinimalModel(nn.Module):
    def __init__(self):
        super(MinimalModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)  # Reduce size by half
        self.fc1 = nn.Linear(32 * 14 * 14, 128)  # Hidden layer
        self.fc2 = nn.Linear(128, 10)      # Output layer

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))  # Apply pooling
        x = x.view(x.size(0), -1)  # Flatten the input
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = MinimalModel()

device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
total_parameters = sum(p.numel() for p in model.parameters())
print(f"Total Parameters : {total_parameters}")
print(model)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

def train_and_evaluate(model, device, train_loader, test_loader, epochs):
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        correct = 0
        total = 0

        # Training loop with progress bar
        with tqdm(train_loader, unit="batch") as tepoch:
            tepoch.set_description(f"Epoch {epoch + 1}/{epochs}")
            for images, labels in tepoch:
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                # Update tqdm bar
                tepoch.set_postfix(loss=train_loss / total, accuracy=100.0 * correct / total)

        train_accuracy = 100.0 * correct / total
        print(f"Training Loss: {train_loss / len(train_loader):.4f}, Training Accuracy: {train_accuracy:.2f}%")

        # Evaluate on the test set
        model.eval()
        test_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in tqdm(test_loader, desc="Testing", unit="batch"):
                outputs = model(images)
                loss = criterion(outputs, labels)

                test_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        test_accuracy = 100.0 * correct / total
        print(f"Test Loss: {test_loss / len(test_loader):.4f}, Test Accuracy: {test_accuracy:.2f}%\n")

# Train and evaluate the model
train_and_evaluate(model, device, train_loader, test_loader, epochs=2)


Total Parameters : 809034
MinimalModel(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=6272, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


Epoch 1/2: 100%|██████████| 938/938 [00:29<00:00, 31.82batch/s, accuracy=75.8, loss=0.0136]


Training Loss: 0.8699, Training Accuracy: 75.79%


Testing: 100%|██████████| 157/157 [00:01<00:00, 99.71batch/s] 


Test Loss: 0.2812, Test Accuracy: 91.33%



Epoch 2/2: 100%|██████████| 938/938 [00:29<00:00, 31.59batch/s, accuracy=92.6, loss=0.00383]


Training Loss: 0.2452, Training Accuracy: 92.61%


Testing: 100%|██████████| 157/157 [00:01<00:00, 92.86batch/s]

Test Loss: 0.2580, Test Accuracy: 91.62%






## Final Version

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm  # For progress bars

# Load MNIST data
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Final CNN Model with Batch Normalization and Dropout
class FinalCNNModel(nn.Module):
    def __init__(self):
        super(FinalCNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(32)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32 * 14 * 14, 128)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = x.view(x.size(0), -1)  # Flatten
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Initialize model, loss, and optimizer
model = FinalCNNModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training function with tqdm and accuracy calculation
def train_and_evaluate(model, train_loader, test_loader, epochs):
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        correct = 0
        total = 0

        # Training loop with progress bar
        with tqdm(train_loader, unit="batch") as tepoch:
            tepoch.set_description(f"Epoch {epoch + 1}/{epochs}")
            for images, labels in tepoch:
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                # Update tqdm bar
                tepoch.set_postfix(loss=train_loss / total, accuracy=100.0 * correct / total)

        train_accuracy = 100.0 * correct / total
        print(f"Training Loss: {train_loss / len(train_loader):.4f}, Training Accuracy: {train_accuracy:.2f}%")

        # Evaluate on the test set
        model.eval()
        test_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in tqdm(test_loader, desc="Testing", unit="batch"):
                outputs = model(images)
                loss = criterion(outputs, labels)

                test_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        test_accuracy = 100.0 * correct / total
        print(f"Test Loss: {test_loss / len(test_loader):.4f}, Test Accuracy: {test_accuracy:.2f}%\n")

# Train and evaluate the model
train_and_evaluate(model, train_loader, test_loader, epochs=10)
