In [16]:
import torch
from torchvision import datasets
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as nn

from MNIST_solver import eval_net_on_data
from  MNIST_solver import create_panel_of_consecutive_ex_images, get_max_n_normalized_mean_n_std
from MNIST_solver import get_train_and_test_data_w_batch_size, MNIST_MLP, eval_net_on_data
from MNIST_solver import PerformanceImprover, TrainingStopper, train_network_classification
from MNIST_solver import define_objective_fcn_with_params, get_HW_acceleration_if_available

from MNIST_solver import get_model_device, train_net_on_data
import numpy as np
import torchvision.models as models

device = get_HW_acceleration_if_available()
print(f"Device used: {device}")

Device used: mps


In [7]:
torch_seed = 11
torch.manual_seed(torch_seed)
data_folder = r'./data'
train_raw = datasets.MNIST(root=data_folder, train=True, download = True, transform=None)
test_raw =  datasets.MNIST(root=data_folder, train=False,download = True, transform=None)

In [8]:
max_data_value, img_mean, img_std = get_max_n_normalized_mean_n_std(train_raw)
print(f'Max data value: {max_data_value:3.0f}')


print(f'mean: {img_mean.numpy():.4f}; std: {img_std:.4f}') # expected 0.1307 and 0.3081, respectively, 
# according to [1]
# Define transformation pipeline
transform_pipeline = transforms.Compose([transforms.ToTensor(), 
                                transforms.Normalize((img_mean,), (img_std,))
                               ])

# Transformed data (to be used by the network)
train_data = datasets.MNIST(root=data_folder, train=True,  download = False, transform=transform_pipeline )
test_data =  datasets.MNIST(root=data_folder, train=False, download = False, transform=transform_pipeline)

Max data value: 255
mean: 0.1307; std: 0.3081


In [10]:
_, img_rows, img_cols = (train_raw.data.numpy().shape)
network_input_dim = img_rows * img_cols

BATCH_SIZE = 64
HIDDEN_NODES = (64,64)
LEARNING_RATE = lr=0.00011

# current execution
train_data_loaded , test_data_loaded = get_train_and_test_data_w_batch_size(BATCH_SIZE , train_data, test_data)

# model creation (use CPU: faster for small networks)
model = MNIST_MLP(network_input_dim, HIDDEN_NODES)

# create optimizer
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

### Try with Resnet

In [5]:
# this code has been adapted from the suggestions made by chatGPT version 3.5, searching for transfer learning

import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

class ResNetForMNIST(nn.Module):
    def __init__(self, resnet):
        super(ResNetForMNIST, self).__init__()
        
        self.resnet =  models.resnet18(weights='ResNet18_Weights.DEFAULT')
        resnet.fc = nn.Linear(resnet.fc.in_features, num_classes)
        
        self.freeze_all_layers_but_lastone()
        
        self.resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

        self.softmax = nn.Softmax(dim=1)  # Apply softmax along the dimension of classes
        
        
        
    def freeze_all_layers_but_lastone(self):
        
        for param in self.resnet.parameters():
            param.requires_grad = False
            resnet.fc.requires_grad = True

    def forward(self, x):
        logits = self.resnet(x)
        probabilities = self.softmax(logits)
        
        # Use torch.max to get the most likely class
        max_values, predicted_class = torch.max(probabilities, dim=1)
        return logits, probabilities, predicted_class

# Load the pre-trained ResNet-18 model
resnet =

# Modify the final classification layer to output raw scores (logits)
num_classes = 10


# Create the modified model
model = ResNetForMNIST(resnet)

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [13]:
get_model_device(resnet)
train_loss_history, training_acc_hist = list(), list()
val_loss_history, val_acc_hist = list(), list()
num_train_samples = len(train_data_loaded.dataset)
num_val_samples = len(test_data_loaded.dataset)
best_test_accuracy = 0.0
best_net_weights = resnet.state_dict()

In [18]:
optimizer = optim.Adam(resnet.parameters(), lr= 0.00045)

In [19]:
device
import torch.nn.functional as F
criterion=F.cross_entropy

In [23]:
def get_third_output(x):
    return x[2]

In [26]:
model.train()

MNIST_MLP(
  (fc1): Linear(in_features=784, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=10, bias=True)
)

In [28]:
net_to_output_fcn = get_third_output

In [31]:
def process_all_data_from_loader_n_get_metrics(data_loader, optimizer, model, processing_fcn,
                                               criterion=F.cross_entropy,
                                               net_to_output_fcn = lambda x:x):
    device = get_model_device(model)

    running_loss = 0.0
    train_correct = 0  # init number of correctly classified items

    for data in data_loader:
        inputs, labels = data

        # dump data to device
        inputs, labels = inputs.to(device), labels.to(device)

        current_loss, correctly_classified_items = batch_process(inputs, labels, optimizer, model,
                                                                 processing_fcn,
                                                                 criterion,
                                                                 net_to_output_fcn)  # batch_process
        running_loss += current_loss.item()
        train_correct += correctly_classified_items

    return running_loss, train_correct


In [32]:
X = [data for data in train_data_loaded ]

In [38]:
input_test = X[0][0]
label_test = X[0][1]

In [40]:
input_test, label_test = input_test.to(device), label_test.to(device)

In [54]:
(model(input_test))

tensor([[-2.2670, -2.1981, -2.2263, -2.3381, -2.3836, -2.2411, -2.3347, -2.4210,
         -2.3858, -2.2567],
        [-2.2945, -2.3270, -2.3775, -2.1820, -2.2927, -2.2525, -2.3156, -2.3626,
         -2.4053, -2.2373],
        [-2.3361, -2.2488, -2.2427, -2.2083, -2.3087, -2.2281, -2.4018, -2.3801,
         -2.4131, -2.2832],
        [-2.2925, -2.2266, -2.2810, -2.2683, -2.3880, -2.2299, -2.3820, -2.3735,
         -2.3916, -2.2158],
        [-2.3065, -2.2183, -2.3992, -2.2082, -2.2588, -2.2558, -2.3172, -2.4017,
         -2.3982, -2.2863],
        [-2.2090, -2.2905, -2.2793, -2.2633, -2.3507, -2.2668, -2.2557, -2.4023,
         -2.4131, -2.3146],
        [-2.2777, -2.1889, -2.3724, -2.1299, -2.3002, -2.2013, -2.3832, -2.4798,
         -2.5497, -2.2226],
        [-2.2821, -2.2206, -2.2665, -2.1827, -2.2915, -2.2159, -2.4140, -2.3624,
         -2.4961, -2.3355],
        [-2.2494, -2.2041, -2.3290, -2.1570, -2.3675, -2.3063, -2.4111, -2.4131,
         -2.4657, -2.1754],
        [-2.3690, -

In [52]:
label_test.shape

torch.Size([64])

In [30]:
training_loss, num_corrected_samples_train = \
            train_net_on_data(train_data_loaded, optimizer, resnet, criterion, get_third_output)

RuntimeError: size mismatch (got input: [10], target: [64])