In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim
from PIL import Image
import io

In [2]:
# Define a transform to normalize the data
transform = transforms.Compose([
    # Resize images from 28x28 to 32x32 to match LeNet-5's expected input dimensions
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    # Normalize images to match LeNet-5's expected input distribution (mean=0.5, std=0.5)
    transforms.Normalize((0.5,), (0.5,))
])

In [3]:
# Download and load the training data
trainset = datasets.MNIST('pytorch/MNIST_data/', download=True, train=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

In [4]:
# Download and load the test data
testset = datasets.MNIST('pytorch/MNIST_data/', download=True, train=False, transform=transform)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

In [5]:
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        # Define the convolutional layers
        # Assuming input is 1 channel (grayscale)
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=2)  
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1)
        
        # Define the fully connected layers
        # The 6*6 comes from the dimension reduction of input image through conv and pooling layers
        self.fc1 = nn.Linear(16*6*6, 120)  
        self.fc2 = nn.Linear(120, 84)
        # Assuming 10 output classes (digits 0-9)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Apply the first convolution, followed by pooling, then ReLU activation
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        # Apply the second convolution, followed by pooling, then ReLU activation
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        # Flatten the tensor for the fully connected layer
        x = x.view(-1, 16*6*6)
        # Apply the first fully connected layer with ReLU activation
        x = F.relu(self.fc1(x))
        # Apply the second fully connected layer with ReLU activation
        x = F.relu(self.fc2(x))
        # Apply the third fully connected layer to get the class scores
        x = self.fc3(x)
        return x

In [6]:
# Create model
model = LeNet5()
# Define the loss function
criterion = nn.CrossEntropyLoss()
# Using Stochastic Gradient Descent with momentum
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Number of epochs to train for
epochs = 20

for epoch in range(epochs):
    running_loss = 0.0
    for images, labels in trainloader:
        # Reset the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass: Compute predicted y by passing x to the model
        outputs = model(images)
        
        # Compute and print loss
        loss = criterion(outputs, labels)

        # Backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        
        # Optimize: update the weights
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch {epoch+1:2d}, Loss: {running_loss/len(trainloader):.12f}")

print('Finished Training')

Epoch  1, Loss: 2.138032580553
Epoch  2, Loss: 0.359664186843
Epoch  3, Loss: 0.170584919484
Epoch  4, Loss: 0.123416796011
Epoch  5, Loss: 0.099806052965
Epoch  6, Loss: 0.086629401974
Epoch  7, Loss: 0.076796694015
Epoch  8, Loss: 0.068962281645
Epoch  9, Loss: 0.061739090699
Epoch 10, Loss: 0.056378339411
Epoch 11, Loss: 0.052204118593
Epoch 12, Loss: 0.048964588991
Epoch 13, Loss: 0.044467732421
Epoch 14, Loss: 0.042177830430
Epoch 15, Loss: 0.039575640976
Epoch 16, Loss: 0.037469258580
Epoch 17, Loss: 0.034605332395
Epoch 18, Loss: 0.032409316632
Epoch 19, Loss: 0.031168907345
Epoch 20, Loss: 0.029228544062
Finished Training


In [7]:
# Load the image file
image_path = 'kaggle/let_net_5_sample_3.png'
with open(image_path, 'rb') as image_file:
    image_bytes = image_file.read()

# Convert bytes to a PIL Image
image = Image.open(io.BytesIO(image_bytes))
image = image.convert('L')  # Convert to grayscale

In [8]:
image_tensor = transform(image).float()

# Unsqueeze to add a batch dimension ( C x H x W  ->  B x C x H x W )
image_tensor = image_tensor.unsqueeze_(0)

# Set the model to evaluation mod
model.eval()

# Pass the input through the model
with torch.no_grad():  # Temporarily set all the requires_grad flag to false
    output = model(image_tensor)

# Interpret the predictions
predicted_probabilities = torch.softmax(output, dim=1)
predicted_class = torch.argmax(predicted_probabilities, dim=1)

print(f"Predicted class: {predicted_class.item()}, expected: {predicted_class.item() == 3}")
print(f"Predicted probabilities: {predicted_probabilities.numpy()}")

Predicted class: 8, expected: False
Predicted probabilities: [[1.2930253e-01 1.0739955e-02 3.7719668e-03 8.3693722e-03 1.2009757e-01
  8.9129739e-05 7.2757476e-03 6.1630011e-03 7.0629650e-01 7.8942021e-03]]
