# Load module

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

# Define parameter

In [None]:
batch_size = 256
epochs = 10
learning_rate = 5e-6 #0.001
number_class = 10
in_shape = (1, 28, 28)  # (channels, height, width) in PyTorch

# Data loading & preprocessing

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),               # Convert PIL Image to tensor
    transforms.Normalize((0.0,), (255.0,)) # Scale 0–255 to 0–1
])

train_dataset = datasets.MNIST(root='../datasets/handwrite', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='../datasets/handwrite', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Model definition

In [None]:
class CNNModel(nn.Module):
    def __init__(self, number_class):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)  # in_channels=1 for MNIST
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        self.flatten = nn.Flatten()
        self.dropout1 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(64 * 5 * 5, 128)  # 28->26->13->11->5 (pooling reduces size)
        self.dropout2 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, number_class)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.pool1(x)
        x = torch.relu(self.conv2(x))
        x = self.pool2(x)
        x = self.flatten(x)
        x = self.dropout1(x)
        x = torch.relu(self.fc1(x))
        x = self.dropout2(x)
        x = self.fc2(x)  # logits (softmax applied in loss)
        return x

# Instantiate model, loss, optimizer

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNNModel(number_class).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Find best learning

In [None]:
def find_lr(model, train_loader, loss_fn, optimizer_class=optim.Adam, 
            init_value=1e-7, final_value=10, beta=0.98):
    """Run a learning rate range test."""
    num = len(train_loader) - 1
    mult = (final_value / init_value) ** (1/num)
    lr = init_value
    optimizer = optimizer_class(model.parameters(), lr=lr)
    
    avg_loss, best_loss = 0., float('inf')
    losses, log_lrs = [], []
    
    for batch_num, (inputs, targets) in enumerate(train_loader):
        # Move to GPU if available
        inputs, targets = inputs.to(next(model.parameters()).device), targets.to(next(model.parameters()).device)

        # Adjust learning rate
        lr *= mult
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)

        # Compute smoothed loss
        avg_loss = beta * avg_loss + (1-beta) * loss.item()
        smoothed_loss = avg_loss / (1 - beta**(batch_num+1))
        
        # Record the lr and loss
        log_lrs.append(lr)
        losses.append(smoothed_loss)
        
        # Stop if the loss explodes
        if batch_num > 1 and smoothed_loss > 4 * best_loss:
            break
        
        # Update best loss
        if smoothed_loss < best_loss or batch_num == 0:
            best_loss = smoothed_loss

        # Backprop
        loss.backward()
        optimizer.step()
    
    # Plot
    plt.plot(log_lrs, losses)
    plt.xscale('log')
    plt.xlabel("Learning Rate")
    plt.ylabel("Loss")
    plt.title("LR Finder")
    plt.show()

# --- Example usage ---
# Assume you already have:
#   train_loader = DataLoader(train_dataset, batch_size=...)
#   loss_fn = nn.CrossEntropyLoss()

find_lr(model, train_loader, criterion)

# Train model

In [None]:
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}")

# Evaluation

In [None]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

Prediction example

In [None]:
import matplotlib.pyplot as plt

sample_image, sample_label = test_dataset[0]
plt.imshow(sample_image.squeeze(), cmap='gray')
plt.title(f"True Label: {sample_label}")
plt.axis('off')
plt.show()

model.eval()
with torch.no_grad():
    sample_image = sample_image.unsqueeze(0).to(device)  # Add batch dim
    output = model(sample_image)
    pred_label = output.argmax(dim=1).item()
    print(f"Predicted Label: {pred_label}")