In [1]:
import cv2
import timm
import torchvision
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
from torchvision.models import resnet18
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim
import copy

MNIST_PATH = './datasets/mnist'


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Loading pretrained model
net = timm.create_model("resnet18", pretrained=False, num_classes=10)
net.conv1 = torch.nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
net.load_state_dict(
  torch.hub.load_state_dict_from_url(
    "https://huggingface.co/gpcarl123/resnet18_mnist/resolve/main/resnet18_mnist.pth",
    map_location="cuda",
    file_name="resnet18_mnist.pth",
  )
)
preprocessor = torchvision.transforms.Normalize((0.1307,), (0.3081,)) #These values were taken from pre-trained models usage instructions
transform = transforms.Compose([transforms.ToTensor()])

#Loading MNIST dataset to test against
test_set_mnist = datasets.MNIST(root=MNIST_PATH, train=False, download=True, transform=transform)
test_loader_mnist = DataLoader(test_set_mnist, batch_size=32, shuffle=False)

print(net)


ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (drop_block): Identity()
      (act1): ReLU(inplace=True)
      (aa): Identity()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act2): ReLU(inplace=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, m

In [3]:
#Evaluating pre-trained model accuracy against MNIST test set
net.eval()
device = torch.device("cuda")
net = net.to(device)

correct = 0
total = 0

with torch.no_grad():
    for data, target in test_loader_mnist:
        data, target = data.to(device), target.to(device)
        data = preprocessor(data)
        outputs = net(data)
        _, predicted = torch.max(outputs, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()
accuracy = 100 * correct / total
print(f"Pre-Trained Model : {accuracy:.2f}%")

Pre-Trained Model : 99.36%


In [4]:
training_set = {}
validation_set = {}
test_set = {}

for i in range(10):
    for j in range(1, 9):
        training_set[f"{i}_{j}"] = cv2.imread(f"../Training Set/{i}train{j}.jpg", cv2.IMREAD_GRAYSCALE)
    for j in range(1, 3):
        validation_set[f"{i}_{j}"] = cv2.imread(f"../Validation Set/{i}validation{j}.jpg", cv2.IMREAD_GRAYSCALE)
    for j in range(1, 3):
        test_set[f"{i}_{j}"] = cv2.imread(f"../Test Set/{i}test{j}.jpg", cv2.IMREAD_GRAYSCALE)


assert(len(training_set) == 80)
assert(len(validation_set) == 20)
assert(len(test_set) == 20)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)) #normalizing values the same way as pre-trained model
])

train_images = []
train_labels = []
for key, image in training_set.items():
    label = int(key.split("_")[0])
    image = transform(image)
    train_images.append(image)
    train_labels.append(label)
train_images = torch.stack(train_images)
train_labels = torch.tensor(train_labels)

validation_images = []
validation_labels = []
for key, image in validation_set.items():
    label = int(key.split("_")[0])
    image = transform(image)
    validation_images.append(image)
    validation_labels.append(label)
validation_images = torch.stack(validation_images)
validation_labels = torch.tensor(validation_labels)

test_images = []
test_labels = []
for key, image in test_set.items():
    label = int(key.split("_")[0])
    image = transform(image)
    test_images.append(image)
    test_labels.append(label)
test_images = torch.stack(test_images)
test_labels = torch.tensor(test_labels)

train_dataset = TensorDataset(train_images, train_labels)
val_dataset = TensorDataset(validation_images, validation_labels)
test_dataset = TensorDataset(test_images, test_labels)

train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [None]:
num_epochs = 50
initial_state = copy.deepcopy(net.state_dict())

def fine_tuning(last_layer_only,learning_rate, gamma_rate):
    net.load_state_dict(initial_state)

    criterion = nn.CrossEntropyLoss()

    for param in net.parameters():
        param.requires_grad = False
    for param in net.fc.parameters():
        param.requires_grad = True
    if not last_layer_only: #This toggles more layers to be tuned as well
        for param in net.layer4.parameters():
            param.requires_grad = True
        for param in net.layer3.parameters():
            param.requires_grad = True
    
    
    if last_layer_only:
            optimizer = optim.Adam(net.fc.parameters(), lr=learning_rate)
    else:
        optimizer = optim.Adam(
            list(net.fc.parameters()) + list(net.layer4.parameters()) + list(net.layer3.parameters()), lr=learning_rate
        )
    
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=gamma_rate)

    best_model_state = None
    best_acc = 0.0

    for epoch in range(num_epochs):
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()
                data_loader = train_loader
            else:
                net.eval()
                data_loader = val_loader
            running_loss = 0.0
            running_corrects = 0
            for inputs, labels in data_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = net(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_acc = running_corrects.double() / len(data_loader.dataset)
            if phase == 'train':
                scheduler.step()
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_state = copy.deepcopy(net.state_dict())

    net.load_state_dict(best_model_state)
    return best_acc, net


In [None]:
#by iterating through different training parameter values, the best validation accuracy can be saved and then later tested for optimal results
learning_rates = [0.01, 0.05, 0.8, 0.1, 0.2, 0.5]
gamma_rates = [0.01, 0.05, 0.1, 0.5]

best_last_layer_only = None
best_last_layer_only_acc = 0
best_last_two_layers = None
best_last_two_layers_acc = 0

for learning_rate in learning_rates:
    for gamma_rate in gamma_rates:
        #last layer fine-tuning
        acc, net = fine_tuning(True,learning_rate, gamma_rate)
        if acc > best_last_layer_only_acc:
            print(f"New best for last layer only, lr = {learning_rate} and gamma = {gamma_rate}")
            best_last_layer_only_acc = acc
            best_last_layer_only = copy.deepcopy(net.state_dict())
        #last two layers fine-tuning
        acc, net = fine_tuning(False, learning_rate, gamma_rate)
        if acc > best_last_two_layers_acc:
            print(f"New best for multiple layers, lr = {learning_rate} and gamma = {gamma_rate}")
            best_last_two_layers_acc = acc
            best_last_two_layers = copy.deepcopy(net.state_dict())

def test(model):
    model.eval()
    to_test = [train_loader, val_loader, test_loader]
    results = []
    with torch.no_grad():
        for testing_now in to_test:
            correct = 0
            total = 0
            for inputs, labels in testing_now:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)

            accuracy = 100 * correct / total
            results.append(accuracy)
    return results

#testing models with best validation accuracies seen from testing different training parameters
net.load_state_dict(best_last_layer_only)
net = net.to(device)
train_acc_last_layer_only, val_acc_last_layer_only, test_acc_last_layer_only = test(net)
net.load_state_dict(best_last_two_layers)
net = net.to(device)
train_acc_last_two_layers, val_acc_last_two_layers, test_acc_last_two_layers = test(net)
net.load_state_dict(initial_state)
net = net.to(device)
_, _, test_acc_pre_trained = test(net)

print(f"Pre-Trained on my 20 images : {test_acc_pre_trained:.2f}%")
print(f"Last Layer Only : train - {train_acc_last_layer_only:.2f}%, validation - {val_acc_last_layer_only:.2f}%, test - {test_acc_last_layer_only:.2f}%")
print(f"Last Two Layers : train - {train_acc_last_two_layers:.2f}%, validation - {val_acc_last_two_layers:.2f}%, test - {test_acc_last_two_layers:.2f}%")



Pre-Trained on my 20 images : 10.00%
Last Layer Only : train - 100.00%, validation - 60.00%, test - 55.00%
Last Two Layers : train - 100.00%, validation - 85.00%, test - 60.00%
