In [None]:
from torchvision import datasets
from torchvision.transforms import ToTensor
import torch
import matplotlib
import matplotlib.pyplot as plt


# VERIFICA PRESENZA DI CUDA
Con nvidia-smi nel prompt si vede anche la versione di Cuda

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

# DOWNLOADING DATASET

In [None]:
train_data= datasets.FashionMNIST(root='data', train=True, download=True, transform=ToTensor(),)

test_data = datasets.FashionMNIST(root='data', train=False, download=True, transform=ToTensor(),)

# DATA LOADER

In [None]:
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor


def handle_dataset():
    train_data = datasets.FashionMNIST(root='data', train=True, download=True, transform=ToTensor(), )
    test_data = datasets.FashionMNIST(root='data', train=False, download=True, transform=ToTensor(), )

    labels_map = { # Classes need to be predicted
        0: 'T-shirt',
        1: 'Trouser',
        2: 'Pullover',
        3: 'Dress',
        4: 'Coat',
        5: 'Sandal',
        6: 'Shirt',
        7: 'Sneaker',
        8: 'Bag',
        9: 'Ankle Boot',
    }
    batch_size = 128  # For processing simultaneously 128 images at every weigth update

    train_dataloader = DataLoader(train_data, batch_size=batch_size,
                                  shuffle=True)  # For every iteration, dataset is divided into gropus of 128 samples. Shuffle helps generalizing the model

    test_dataloader = DataLoader(test_data, batch_size=batch_size)  # Same as train_dataloader but for the test

    return train_dataloader, test_dataloader, labels_map


# DEFYINING THE INITIALIZATION KERNEL AND WEIGHTS



# Bulding the Neural Net
Here is were the different net classes (CNNs) are declared. We'll divide the architectures into 2 sets: A1 and A2.
CNNs of A1 share the same architectures, but they differ in the initialization/training of the kernels, and the same happens for A2. 
We have 3 types of the initialization schemas:
-  HF;
- HT;
- DT

NB: WHEN YOU HAVE DECIDED TO USE THE DESIRED ARCHITECTURE, CHANGE THE NAME OF THE CLASS WITH "Net"

## About the kernel initialization

The dataset contains grayscale images, wich have 1 channel and a dimension of 28x28 pixels.
The initial kernel will be of size 3, padding 1 and stride 1 and with 5 output channels (5 kernels)

For understanding how will be the size of the image after applying a convolution layer, we have to see this formula:
$$O = \frac{(I - K + 2P)}{S} + 1$$

Where *I* is the size of the input, *K* is the size of the kernel, *P* is the padding and *S* is the stride
This formula returns the shape of the image after 1 convolution layer.

## SET A1
### HF

In [None]:
from torch import nn
import torch

#
class A1HF(nn.Module):
    def __init__(self, classes):
        super(A1HF, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=5, kernel_size=3, padding=1)  # Convolution layer
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(5 * 14 * 14, 100)  # first number is to decoding the 3d tensor vector into a 1D dimensional vector, 100 is the number of output neurons of fc1. I chosed 100 because is a good tradeoff between speed and leaning capacity
        self.fc2 = nn.Linear(100, len(classes))  # Final fully connected layer. This is the layer that makes the preictions.
        self.relu = nn.ReLU()  # Activation Function
        self.flatten = nn.Flatten()


    def forward(self, x):
        x = self.pool1(self.relu(self.conv1(x)))

        x = self.flatten(x)  # x becomes a 2D vector

        x = self.relu(self.fc1(x))  # actv. function applied on the first fully connected layer of output

        x = self.fc2(x)  # Prediction
        return x




### HT

In [None]:
from torch import nn
import torch

#A1_HT
class A1HT(nn.Module):
    def __init__(self, classes):
        super(A1HT, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=5, kernel_size=3, padding=1)  # Convolution layer
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(5 * 14 * 14, 100)  # first number is to decoding the 3d tensor vector into a 1D dimensional vector, 100 is the number of output neurons of fc1. I chosed 100 because is a good tradeoff between speed and leaning capacity
        self.fc2 = nn.Linear(100, len(classes))  # Final fully connected layer. This is the layer that makes the preictions.
        self.relu = nn.ReLU()  # Activation Function
        self.flatten = nn.Flatten()

    def set_initial_kernels(self):
        # Definito by hand i primi 5 kernels
        kernel1 = torch.tensor([[0, 1, 0],
                                [1, 1, 1],
                                [0, 1, 0]], dtype=torch.float32)

        kernel2 = torch.tensor([[1, 0, 1],
                                [0, 1, 0],
                                [1, 0, 1]], dtype=torch.float32)

        kernel3 = torch.tensor([[1, 1, 1],
                                [0, 0, 0],
                                [1, 1, 1]], dtype=torch.float32)

        kernel4 = torch.tensor([[0, 1, 0],
                                [0, 0, 0],
                                [1, 0, 1]], dtype=torch.float32)

        kernel5 = torch.tensor([[1, 1, 0],
                                [1, 0, 0],
                                [0, 0, 0]], dtype=torch.float32)
        kernels = [kernel1, kernel2, kernel3, kernel4, kernel5]  # list of

        with torch.no_grad():
            # I pesi di conv1 hanno shape [5, 1, 3, 3] (5 kernels, 1 canale, dimensione 3x3)
            # Assegno ciascun pattern al corrispondente kernel per l'unico canale in ingresso.
            for k, kernel in enumerate(kernels):
                self.conv1.weight[k, 0] = kernel



    def forward(self, x):
        x = self.pool1(self.relu(self.conv1(x)))

        x = self.flatten(x)  # x becomes a 2D vector

        x = self.relu(self.fc1(x))  # actv. function applied on the first fully connected layer of output

        x = self.fc2(x)  # Prediction
        return x




### DT

In [None]:
from torch import nn
import torch

#A1_DT
class A1DT(nn.Module):
    def __init__(self, classes):
        super(A1DT, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=5, kernel_size=3, padding=1)  # Convolution layer
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(5 * 14 * 14, 100)  # first number is to decoding the 3d tensor vector into a 1D dimensional vector, 100 is the number of output neurons of fc1. I chosed 100 because is a good tradeoff between speed and leaning capacity
        self.fc2 = nn.Linear(100, len(classes))  # Final fully connected layer. This is the layer that makes the preictions.
        self.relu = nn.ReLU()  # Activation Function
        self.flatten = nn.Flatten()


    def forward(self, x):
        x = self.pool1(self.relu(self.conv1(x)))

        x = self.flatten(x)  # x becomes a 2D vector

        x = self.relu(self.fc1(x))  # actv. function applied on the first fully connected layer of output

        x = self.fc2(x)  # Prediction
        return x




## Set A2
### HF

In [None]:
from torch import nn
import torch

#A2_HF
class A2HF(nn.Module):
    def __init__(self, classes):
        super(A2HF, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=5, kernel_size=3, padding=1)  # Convolution layer
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(5 * 14 * 14, 100)  # first number is to decoding the 3d tensor vector into a 1D dimensional vector, 100 is the number of output neurons of fc1. I chosed 100 because is a good tradeoff between speed and leaning capacity
        self.fc2 = nn.Linear(100, len(classes))  # Final fully connected layer. This is the layer that makes the preictions.
        self.relu = nn.ReLU()  # Activation Function
        self.flatten = nn.Flatten()

    def set_initial_kernels(self):
        # Definito by hand i primi 5 kernels
        kernel1 = torch.tensor([[0, 1, 0],
                                [1, 1, 1],
                                [0, 1, 0]], dtype=torch.float32)

        kernel2 = torch.tensor([[1, 0, 1],
                                [0, 1, 0],
                                [1, 0, 1]], dtype=torch.float32)

        kernel3 = torch.tensor([[1, 1, 1],
                                [0, 0, 0],
                                [1, 1, 1]], dtype=torch.float32)

        kernel4 = torch.tensor([[0, 1, 0],
                                [0, 0, 0],
                                [1, 0, 1]], dtype=torch.float32)

        kernel5 = torch.tensor([[1, 1, 0],
                                [1, 0, 0],
                                [0, 0, 0]], dtype=torch.float32)
        kernels = [kernel1, kernel2, kernel3, kernel4, kernel5]  # list of

        with torch.no_grad():
            # I pesi di conv1 hanno shape [5, 1, 3, 3] (5 kernels, 1 canale, dimensione 3x3)
            # Assegno ciascun pattern al corrispondente kernel per l'unico canale in ingresso.
            for k, kernel in enumerate(kernels):
                self.conv1.weight[k, 0] = kernel



    def forward(self, x):
        x = self.pool1(self.relu(self.conv1(x)))

        x = self.flatten(x)  # x becomes a 2D vector

        x = self.relu(self.fc1(x))  # actv. function applied on the first fully connected layer of output

        x = self.fc2(x)  # Prediction
        return x




### HT

In [None]:
from torch import nn
import torch

# A2_HT
class A2HT(nn.Module):
    def __init__(self, classes):
        super(A2HT, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=5, kernel_size=3, padding=1)  # Convolution layer
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(5 * 14 * 14, 100)  # first number is to decoding the 3d tensor vector into a 1D dimensional vector, 100 is the number of output neurons of fc1. I chosed 100 because is a good tradeoff between speed and leaning capacity
        self.fc2 = nn.Linear(100, len(classes))  # Final fully connected layer. This is the layer that makes the preictions.
        self.relu = nn.ReLU()  # Activation Function
        self.flatten = nn.Flatten()

    def set_initial_kernels(self):
        # Definito by hand i primi 5 kernels
        kernel1 = torch.tensor([[0, 1, 0],
                                [1, 1, 1],
                                [0, 1, 0]], dtype=torch.float32)

        kernel2 = torch.tensor([[1, 0, 1],
                                [0, 1, 0],
                                [1, 0, 1]], dtype=torch.float32)

        kernel3 = torch.tensor([[1, 1, 1],
                                [0, 0, 0],
                                [1, 1, 1]], dtype=torch.float32)

        kernel4 = torch.tensor([[0, 1, 0],
                                [0, 0, 0],
                                [1, 0, 1]], dtype=torch.float32)

        kernel5 = torch.tensor([[1, 1, 0],
                                [1, 0, 0],
                                [0, 0, 0]], dtype=torch.float32)

        kernels = [kernel1, kernel2, kernel3, kernel4, kernel5]  # list of

        for k, kernel in enumerate(kernels):
            self.conv1.weight[k, 0] = kernel

    def forward(self, x):
        x = self.pool1(self.relu(self.conv1(x)))

        x = self.flatten(x)  # x becomes a 2D vector

        x = self.relu(self.fc1(x))  # actv. function applied on the first fully connected layer of output

        x = self.fc2(x)  # Prediction
        return x




### DT

In [None]:
from torch import nn
import torch

#A2_DT
class A2DT(nn.Module):
    def __init__(self, classes):
        super(A2DT, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=5, kernel_size=3, padding=1)  # Convolution layer
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(5 * 14 * 14, 100)  # first number is to decoding the 3d tensor vector into a 1D dimensional vector, 100 is the number of output neurons of fc1. I chosed 100 because is a good tradeoff between speed and leaning capacity
        self.fc2 = nn.Linear(100, len(classes))  # Final fully connected layer. This is the layer that makes the preictions.
        self.relu = nn.ReLU()  # Activation Function
        self.flatten = nn.Flatten()


    def forward(self, x):
        x = self.pool1(self.relu(self.conv1(x)))

        x = self.flatten(x)  # x becomes a 2D vector

        x = self.relu(self.fc1(x))  # actv. function applied on the first fully connected layer of output

        x = self.fc2(x)  # Prediction
        return x




# TRAIN_LOOP

In [None]:
import torch
import os

def train_loop(dataloader, model, loss_fn, optimizer, epoch, device):
    size = len(dataloader.dataset)
    print(f"Training set of size: {size}")

    for batch, (X, y) in enumerate(dataloader):  # (X = input, y = target)
        X, y = X.to(device), y.to(device)  # Setting of 2 architectures

        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()  # Loss function calculating the zero-gradient descent
        loss.backward()
        optimizer.step()

        if batch % 1000 == 0:  # every 1000 batch it prints the loss
            loss, current = loss.item(), (batch + 1) * len(X)
            current_loss = current / size
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

    # torch save model with torch.save()
    checkpoint = {
        'epoch': epoch,  # l'epoca corrente
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
    }

    # Definisci il percorso della cartella dei checkpoint
    checkpoint_dir = r'C:\Users\stefa\Desktop\DNN2025\DNNStefano\DNN\CheckpointsNotebook'

    # Se la cartella non esiste, la creiamo
    os.makedirs(checkpoint_dir, exist_ok=True)

    # Costruiamo il percorso completo del file checkpoint
    checkpoint_path = os.path.join(checkpoint_dir, f'epoch_{epoch}_Model_CNN_A1_DT.pt')

    torch.save(checkpoint, checkpoint_path)




# TEST_LOOP

In [None]:
import torch


def test_loop(dataloader, model, loss_fn, device):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return 100 * correct, test_loss


# START

In [None]:


def start(epochs, iteratore, device, train_loader, test_loader):
    for iterator in range(iteratore, epochs):
        print(f"Epoch {iterator + 1}\n-------------------------------")
        train_loop(train_dataloader, model, loss_fn, optimizer, iterator + 1, device)
        accuracy, loss = test_loop(test_dataloader, model, loss_fn, device)
        accuracies.append(accuracy)
        losses.append(loss)
    print("Done!")


if __name__ == "__main__":
    print("START")
    train_dataloader, test_dataloader, labels_map = handle_dataset()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = A1HT(labels_map)  # Dichiarazione di un oggetto di tipo Net
    model.to(device)
    loss_fn = nn.CrossEntropyLoss()
    model.set_initial_kernels() # check if the class has this method. Otherwise, comment it
    learning_rate = 1e-4
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    epochs = 20
    accuracies = []
    losses = []
    iterator = 0
    start(epochs, iterator, device, train_dataloader, test_dataloader)

    epochs = range(1, len(accuracies) + 1)

    # comment if it creates problems
    matplotlib.use('TkAgg')  # Oppure 'Qt5Agg' se hai PyQt5 installato

    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 1)  # Primo subplot
    plt.plot(epochs, accuracies, marker='o', label="Accuracy")
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title(f'Accuracy Over Epochs')
    plt.grid(True)
    plt.legend()

    plt.subplot(1, 2, 2)  # Secondo subplot
    plt.plot(epochs, losses, marker='o', label="Loss")
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title(f'Loss Over Epochs')
    plt.grid(True)
    plt.legend()

    plt.tight_layout()
    plt.show()


# Per ottenere informazioni di un checkpoint

In [None]:
checkpoint_path = r'C:\Users\stefa\Desktop\DNN_2025\Architetture\Checkpoints\SetA1\A1DT\epoch_5_Model_CNN_A1DT.pth'
checkpoint = torch.load(checkpoint_path)
checkpoint

In [27]:
import torch

# Carica i checkpoint
checkpoint1 = torch.load("Checkpoints/SetA1/A1DT/epoch_1_Model_CNN_A1DT.pth")
checkpoint2 = torch.load("PrevTraining/Checkpoints/SetA1/A1DT/epoch_1_Model_CNN_A1DT.pth")

# Controlla le chiavi disponibili
print("Checkpoint A1DT Keys:", checkpoint1.keys())
print("Checkpoint A2HT Keys:", checkpoint2.keys())

# Confronta i pesi del primo layer
layer_name = "conv1.weight"  # Modifica con il nome corretto del primo layer nel tuo modello
weights_A1DT = checkpoint1["model_state_dict"][layer_name]
weights_A2HT = checkpoint2["model_state_dict"][layer_name]
print(f"weights_A1HT.shape: {weights_A1DT}")
print(f"weights_A2HT.shape: {weights_A2HT}")

# Verifica la coerenza
if torch.allclose(weights_A1DT, weights_A2HT, atol=1e-6):
    print("I pesi iniziali del primo layer sono coerenti tra le reti!")
else:
    print("Attenzione! I pesi iniziali sono diversi.")


Checkpoint A1DT Keys: dict_keys(['model_state_dict'])
Checkpoint A2HT Keys: dict_keys(['model_state_dict'])
weights_A1HT.shape: tensor([[[[-0.0154,  0.2141,  0.3755],
          [-0.1181,  0.1458,  0.4804],
          [-0.0644,  0.4549,  0.0154]]],


        [[[ 0.1725,  0.3713, -0.0484],
          [ 0.0533,  0.0843, -0.3922],
          [ 0.0318,  0.3630,  0.0139]]],


        [[[ 0.3294,  0.0549,  0.0895],
          [ 0.2067,  0.1893,  0.3953],
          [ 0.3433,  0.1022, -0.2267]]],


        [[[ 0.2402,  0.1688, -0.3027],
          [-0.3424, -0.4309, -0.2804],
          [-0.3145, -0.0862, -0.0420]]],


        [[[-0.3060,  0.3026, -0.3550],
          [ 0.2466, -0.4196, -0.1897],
          [ 0.4299, -0.2667,  0.3686]]]], device='cuda:0')
weights_A2HT.shape: tensor([[[[-0.3907, -0.3157, -0.2261],
          [-0.4974, -0.1876, -0.2189],
          [-0.2164, -0.2368,  0.5098]]],


        [[[-0.4348, -0.4456,  0.4629],
          [ 0.1217,  0.0720, -0.0369],
          [ 0.4530,  0.3005, -0.