In [3]:
# ## This cell contains the essential imports you will need – DO NOT CHANGE THE CONTENTS! ##
# # src: MNIST_Handwritten_Digits_STARTER.ipynb
import torch
from torchvision import datasets
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as nn

from MNIST_solver import eval_net_on_data
from MNIST_solver import create_panel_of_consecutive_ex_images, get_max_n_normalized_mean_n_std
from MNIST_solver import get_train_and_test_data_w_batch_size, MNIST_MLP, eval_net_on_data
from MNIST_solver import PerformanceImprover, TrainingStopper, train_network_classification
from MNIST_solver import define_objective_fcn_with_params
from MNIST_solver import get_model_device , get_HW_acceleration_if_available


# Additional optimizer for tuning the hyper-parameters
# src: https://optuna.org
import optuna
import numpy as np
import torchvision.models as models

torch_seed = 11
torch.manual_seed(torch_seed)

<torch._C.Generator at 0x117d4f6d0>

In [4]:
data_folder = r'./data'
train_raw = datasets.MNIST(root=data_folder, train=True, download = True, transform=None)
test_raw =  datasets.MNIST(root=data_folder, train=False,download = True, transform=None)

max_data_value, img_mean, img_std = get_max_n_normalized_mean_n_std(train_raw)
transform_pipeline = transforms.Compose([transforms.ToTensor(), 
                                transforms.Normalize((img_mean,), (img_std,))
                               ])
# Transformed data (to be used by the network)
train_data = datasets.MNIST(root=data_folder, train=True,  download = False, transform=transform_pipeline )
test_data =  datasets.MNIST(root=data_folder, train=False, download = False, transform=transform_pipeline)

_, img_rows, img_cols = (train_data.data.numpy().shape)
network_input_dim = img_rows * img_cols

In [5]:
BATCH_SIZE = 64 # cannot be changed unless the architecture of resnet is changed
LEARNING_RATE = 0.00045

train_data_loaded , test_data_loaded = get_train_and_test_data_w_batch_size(BATCH_SIZE , train_data, test_data)

In [None]:
# create the network
# Load a pre-trained ResNet-18 model
resnet = models.resnet18(weights='ResNet18_Weights.DEFAULT')

# resnet = models.resnet18(pretrained=True)

In [None]:
resnet.conv1 = nn.Conv2d(1, BATCH_SIZE, kernel_size=7, stride=2, padding=3, bias=False)
num_classes = 10  # MNIST has 10 classes (digits 0-9)
resnet.fc = nn.Linear(resnet.fc.in_features, num_classes)

In [None]:
# device = get_HW_acceleration_if_available()
# resnet.to(device)

In [None]:
optimizer = optim.Adam(resnet.parameters(), lr= LEARNING_RATE)
# from MNIST_solver import train_net_on_data

In [None]:
import torch.nn.functional as F
criterion=F.cross_entropy

In [None]:
train_loader, test_loader = get_train_and_test_data_w_batch_size(BATCH_SIZE, train_data, test_data)

In [None]:
train_loss_history, training_acc_hist = list(), list()
val_loss_history, val_acc_hist = list(), list()
num_train_samples = len(train_loader.dataset)
num_val_samples = len(test_loader.dataset)

In [None]:
net = resnet
best_test_accuracy = 0.0
best_net_weights = net.state_dict()

In [None]:
net.train()

In [None]:
def training_step(loss, optimizer):
    # init gradient
    optimizer.zero_grad()

    # compute gradient
    loss.backward()

    # update the model's weights
    optimizer.step()
    return loss

import torch.nn.functional as F
criterion=F.cross_entropy

In [None]:
from MNIST_solver import process_all_data_from_loader_n_get_metrics

In [None]:
model = resnet
device = get_model_device(model)

In [None]:
running_loss = 0.0
train_correct = 0  # i

In [None]:
input_out = [data for data in train_loader]

In [None]:
input_ , label_ = input_out[0]

In [None]:
input_, label_ = input_.to(device), label_.to(device)

In [None]:
# loss = criterion(output, label_)

In [None]:
output.shape

In [None]:
preds

In [1]:
# this code has been adapted from the suggestions made by chatGPT version 3.5, searching for transfer learning

import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

class ResNetForMNIST(nn.Module):
    def __init__(self, resnet):
        super(ResNetForMNIST, self).__init__()
        
        self.resnet = resnet
        
        self.freeze_all_layers_but_lastone()
        
        self.resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

        self.softmax = nn.Softmax(dim=1)  # Apply softmax along the dimension of classes
        
        
        
    def freeze_all_layers(self):
        
        for param in self.resnet.parameters():
            param.requires_grad = False
            resnet.fc.requires_grad = True

    def forward(self, x):
        logits = self.resnet(x)
        probabilities = self.softmax(logits)
        
        # Use torch.max to get the most likely class
        max_values, predicted_class = torch.max(probabilities, dim=1)
        return logits, probabilities, predicted_class

# Load the pre-trained ResNet-18 model
resnet = models.resnet18(weights='ResNet18_Weights.DEFAULT')

# Freeze layers as needed
# ...

# Modify the final classification layer to output raw scores (logits)
num_classes = 10
resnet.fc = nn.Linear(resnet.fc.in_features, num_classes)

# Create the modified model
model = ResNetForMNIST(resnet)

In [2]:
model.to('mps')

ResNetForMNIST(
  (resnet): ResNet(
    (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, tra

In [None]:
_ , __ , OUT = model(input_)

In [None]:
a = set()

In [None]:
a.add(3)

In [None]:
a