In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

`torchvision`: pytorch extension that provides datasets, model architectures, and image transformation utils
`torchvision.transform`: contains image transformation functions

In [2]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)

`transforms.ToTensor()`: converts an image (which is originally represented as a PIL Image or a numpy array) into a PyTorch tensor. Specifically, it converts the image's pixel values to a tensor and scales them to the range [0, 1]. So, each pixel's value is divided by 255 to bring it into the [0, 1] range.

`transforms.Normalize((0.5), (0.5))`: normalize the picture values
- Subtract 0.5: This step shifts the values from [0, 1] to [-0.5, 0.5]. It centers the data around zero.
- Divide by 0.5: This step scales the values from [-0.5, 0.5] to [-1, 1]. The purpose of this step is to ensure that the input data to the neural network has a mean of 0 and a standard deviation of 1, which can help with training stability. 

___This will result in a tensor with values in the range of [-1, 1], which is a common preprocessing step for training deep neural networks. (to help network learn effectively and make the training process converge faster)___


In [3]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

`trainloader`, responsible batching, shuffling, and providing data to the training loop

In [34]:
# define the CNN architecture
# let's now build a simple model
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.fc1 = nn.Linear(24*24*64, 128)
        self.fc2 = nn.Linear(128, 10)
        
    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.flatten(x, 1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return torch.log_softmax(x, dim=1)

In [35]:
# instantiate the model
net = Net()
# define loss func
criterion = nn.CrossEntropyLoss()
# define optimizer 
optimizer = optim.SGD(net.parameters(), lr=.01)

In [36]:
# train
# loop over the dataset 5 times
for epoch in range(5):
    running_loss = .0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 200 == 199:    # print every 200 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0
            
print('Finished Training')

[1,   200] loss: 1.142
[1,   400] loss: 0.413
[1,   600] loss: 0.352
[1,   800] loss: 0.297
[2,   200] loss: 0.246
[2,   400] loss: 0.210
[2,   600] loss: 0.213
[2,   800] loss: 0.186
Finished Training


In [4]:
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

In [41]:
net.eval()

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=36864, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [42]:
correct = 0
total = 0

with torch.no_grad():  # disable gradient tracking during evaluation for efficiency
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy on the test set: {accuracy:.2f}%')

Accuracy on the test set: 94.67%


# Fine-tuning

In [55]:
# define the CNN architecture
# let's now build a simple model
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 64, 3, 1)
        self.bn3 = nn.BatchNorm2d(64)
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(30976, 360)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(360, 128)
        self.fc3 = nn.Linear(128, 10)
        
    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv3(x))
        x = torch.flatten(x, 1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return torch.log_softmax(x, dim=1)

In [56]:
from torch.optim.lr_scheduler import StepLR

model = Model()

optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=1, gamma=.7) # decay LR by a factor of 0.7 every epoch
criterion = nn.CrossEntropyLoss()

In [57]:
for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 200 == 199:    # print every 200 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

[1,   200] loss: 0.324
[1,   400] loss: 0.099
[1,   600] loss: 0.075
[1,   800] loss: 0.061
[2,   200] loss: 0.044
[2,   400] loss: 0.040
[2,   600] loss: 0.038
[2,   800] loss: 0.042


In [58]:
model.eval()

Model(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=30976, out_features=360, bias=True)
  (fc2): Linear(in_features=360, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=10, bias=True)
)

In [59]:
correct = 0
total = 0

with torch.no_grad():  # disable gradient tracking during evaluation for efficiency
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy on the test set: {accuracy:.2f}%')

Accuracy on the test set: 98.82%


`input_batch = input_tensor.unsqueeze(0)`: add batch dimension to the input tensor; this is necessary because deep learning models typically expect input data to be organized in batches, even if we are processing a single image

In [67]:
from PIL import Image

image_path = './digit6.png'

def load_and_preprocess_img(image_path):
    image = Image.open(image_path)
    # convert it to grayscale
    image = image.convert('L')
    
    preprocess = transforms.Compose([
        transforms.Resize((28, 28)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    
    # preprocess the image
    input_tensor = preprocess(image)
    print(input_tensor.shape)
    input_batch = input_tensor.unsqueeze(0)  # add a batch dimension
    return input_batch

def classify_number(image_path):
    input_batch = load_and_preprocess_img(image_path)
    
    model.eval()
    
    with torch.no_grad():
        output = model(input_batch)
        
    # Get the predicted class
    _, predicted_idx = torch.max(output, 1)
    predicted_class = predicted_idx.item()
    
    return predicted_class


# Classify the number in the image
predicted_class = classify_number(image_path)
print(f'The predicted class is: {predicted_class}')

torch.Size([1, 28, 28])
The predicted class is: 6


In [None]:
class EarlyStopping:
    def __init__(self, patience=5):
        self.patience = patience
        self.counter = 0
        self.best_loss = None

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss:
            self.counter += 1
            if self.counter >= self.patience:
                return True
        else:
            self.best_loss = val_loss
            self.counter = 0
        return False
