In [1]:
import torch
import torchvision
from torchvision import datasets, transforms
from torch import nn, optim
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F
import torchvision.transforms.functional as TF
from torch.optim.lr_scheduler import ReduceLROnPlateau
from PIL import Image, ImageOps
from os import listdir
from os.path import isfile, join
import random

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [11]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_transform = transforms.Compose([
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [3]:
trainset = datasets.MNIST(root='./data', train=True, download=True, transform=train_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=4)

testset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=False, num_workers=4)

In [4]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout1 = nn.Dropout(0.25)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
model = ConvNet().to(device)

        print("output before flatten:", x.shape)

In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5, factor=5, verbose=True)

In [6]:
epochs = 10
for epoch in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    scheduler.step()

    print(f"Epoch {epoch+1} - Training loss: {running_loss/len(trainloader)}")

Epoch 1 - Training loss: 0.13082322124779847
Epoch 2 - Training loss: 0.04275359336606537
Epoch 3 - Training loss: 0.029038904491299762
Epoch 4 - Training loss: 0.020668276721595612
Epoch 5 - Training loss: 0.015687407163441337
Epoch 6 - Training loss: 0.013357100754774365
Epoch 7 - Training loss: 0.01020758441968619
Epoch 8 - Training loss: 0.009308512491018519
Epoch 9 - Training loss: 0.007211169597870708
Epoch 10 - Training loss: 0.007107333087329541


In [7]:
correct = 0
total = 0
with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy of the network on the 10,000 test images: {100 * correct/total}%")

Accuracy of the network on the 10,000 test images: 99.515%


In [8]:
def preprocess_image(image_path):
    image = Image.open(image_path).convert('L')
    image = image.resize((28, 28), Image.Resampling.BICUBIC)
    image = ImageOps.invert(image)
    image = transform(image).to(device)
    return image

custom_images_path = './custom_digits/'
custom_images = [f for f in sorted(listdir(custom_images_path)) if isfile(join(custom_images_path, f))]

custom_data = torch.stack([preprocess_image(join(custom_images_path, img)) for img in custom_images])
custom_labels = torch.tensor([int(img.split('_')[0]) for img in custom_images], dtype=torch.long)

custom_dataset = TensorDataset(custom_data, custom_labels)
custom_loader = DataLoader(custom_dataset, batch_size=1, shuffle=False)

In [9]:
model.eval()

correct1 = 0
total1 = 0
with torch.no_grad():
    for images, labels in custom_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total1 += labels.size(0)
        correct1 += (predicted == labels).sum().item()

print(f"Accuracy of the model on custom handwritten digits: {100 * correct1 / total1}%")


Accuracy of the model on custom handwritten digits: 75.0%


In [10]:
model.eval()

random_image_path = random.choice(custom_images)
random_image = preprocess_image(join(custom_images_path, random_image_path)).unsqueeze(0)

with torch.no_grad():
    random_image = random_image.to(device)
    outputs = model(random_image)
    _, predicted = torch.max(outputs, 1)

actual_digit = int(random_image_path.split('_')[0])

print(f"File name: {random_image_path}")
print(f"Predicted digit: {predicted.item()}")
print(f"Actual digit: {actual_digit}")

File name: 3_1.jpg
Predicted digit: 3
Actual digit: 3
