In [1]:
!pip install torch torchvision numpy Pillow matplotlib



In [2]:
import os
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

In [3]:
def load_images_and_preprocess(directory):
    """
    Helper function to load and preprocess handwritten images and their labels.
    Args:
        directory (str): Path to the directory containing image files.
    Returns:
        images (torch.Tensor): Tensor of image data (num_images, 1, 28, 28).
        labels (torch.Tensor): Tensor of labels corresponding to the images.
    """
    images = []
    labels = []

    transform = transforms.Compose([
        transforms.Resize((28, 28)),
        transforms.ToTensor(),
        transforms.Normalize(mean=0.5, std=0.5)
    ])

    for filename in os.listdir(directory):
        if filename.endswith(".png"):
            label = int(filename.split('-')[0])
            image_path = os.path.join(directory, filename)
            image = Image.open(image_path).convert('L')
            image_tensor = transform(image)
            images.append(image_tensor)
            labels.append(label)
    
    images = torch.stack(images)
    labels = torch.tensor(labels, dtype=torch.long)
    
    return images, labels

directory_path = "./digits"
images, labels = load_images_and_preprocess(directory_path)

In [4]:
class HandwrittenDigitsDataset(torch.utils.data.Dataset):
    def __init__(self, directory):
        self.directory = directory
        self.image_files = [f for f in os.listdir(directory) if f.endswith(".png")]
        self.transform = transforms.Compose([
            transforms.Resize((28, 28)),
            transforms.ToTensor(),
            transforms.Normalize(mean=0.5, std=0.5)
        ])
    
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        file_name = self.image_files[idx]
        label = int(file_name.split('-')[0])
        image_path = os.path.join(self.directory, file_name)
        image = Image.open(image_path).convert('L')
        image = self.transform(image)
        return image, label

# Create dataset and dataloader
dataset = HandwrittenDigitsDataset(directory_path)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

In [5]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)
        
    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [6]:
def load_mnist_data(batch_size=64):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
    test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader

# Load MNIST dataset
train_loader, test_loader = load_mnist_data(batch_size=64)

In [7]:
def train(model, device, train_loader, optimizer, criterion, epochs=5):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for batch_idx, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if (batch_idx + 1) % 100 == 0:
                print(f"Epoch [{epoch+1}/{epochs}], Step [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item():.4f}")

# Initialize model, criterion, optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MLP().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
train(model, device, train_loader, optimizer, criterion, epochs=5)

Epoch [1/5], Step [100/938], Loss: 0.2409
Epoch [1/5], Step [200/938], Loss: 0.2858
Epoch [1/5], Step [300/938], Loss: 0.3332
Epoch [1/5], Step [400/938], Loss: 0.2582
Epoch [1/5], Step [500/938], Loss: 0.3940
Epoch [1/5], Step [600/938], Loss: 0.1924
Epoch [1/5], Step [700/938], Loss: 0.1441
Epoch [1/5], Step [800/938], Loss: 0.1508
Epoch [1/5], Step [900/938], Loss: 0.1140
Epoch [2/5], Step [100/938], Loss: 0.1090
Epoch [2/5], Step [200/938], Loss: 0.0664
Epoch [2/5], Step [300/938], Loss: 0.1735
Epoch [2/5], Step [400/938], Loss: 0.0596
Epoch [2/5], Step [500/938], Loss: 0.2743
Epoch [2/5], Step [600/938], Loss: 0.1641
Epoch [2/5], Step [700/938], Loss: 0.1280
Epoch [2/5], Step [800/938], Loss: 0.1243
Epoch [2/5], Step [900/938], Loss: 0.1820
Epoch [3/5], Step [100/938], Loss: 0.0975
Epoch [3/5], Step [200/938], Loss: 0.1467
Epoch [3/5], Step [300/938], Loss: 0.0202
Epoch [3/5], Step [400/938], Loss: 0.0790
Epoch [3/5], Step [500/938], Loss: 0.1910
Epoch [3/5], Step [600/938], Loss:

In [8]:
def test(model, device, test_loader, criterion):
    model.eval()
    correct = 0
    total = 0
    test_loss = 0.0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            test_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    avg_test_loss = test_loss / len(test_loader)
    print(f"Test Accuracy: {accuracy:.2f}%")
    print(f"Avg Test Loss: {avg_test_loss:.4f}")

# Test the model
test(model, device, test_loader, criterion)

Test Accuracy: 97.22%
Avg Test Loss: 0.0907


In [9]:
# Fine-tune the model using your own handwritten digits
fine_tune_loader = DataLoader(dataset, batch_size=16, shuffle=True)
train(model, device, fine_tune_loader, optimizer, criterion, epochs=5)
test(model, device, fine_tune_loader, criterion)

Test Accuracy: 90.38%
Avg Test Loss: 0.2896
