In [1]:
import torch
import torch.nn as nn  # the neural network library of pytorch
from torch.nn import AdaptiveAvgPool2d
from torch.optim.lr_scheduler import ExponentialLR
from src.load_dataset import load_mnist, load_fashion_mnist, load_medmnist

## Define CNN

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        # first convolutionnal layer
        self.conv1 = nn.Conv2d(
            in_channels=1,
            out_channels=7,
            kernel_size=4,
            stride=2,
            padding=1
        )

        # first pooling layer
        self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2, divisor_override=1)

        # second convolutionnal layer
        self.conv2 = nn.Conv2d(
            in_channels=7,
            out_channels=7,
            kernel_size=4,
            stride=2,
            padding=1
        )

        #second pooling layer
        self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2, divisor_override=1)

        # Flatten
        self.flat = nn.Flatten()

        # fully connected layer, output 10 classes
        self.fc = nn.Linear(7, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.flat(x)
        output = self.fc(x)
        return output

## Hyperparameters and training

In [4]:
batch_size = 10  # the number of examples per batch
class_set = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]  # filter dataset
I = 16
device = torch.device("cpu")
network = CNN()
learning_rate = 1e-2*(0.66)  # the scale of the changes applied to the weights
optimizer = torch.optim.Adam(network.parameters(), lr=learning_rate)
scheduler = ExponentialLR(optimizer, gamma=0.9)
criterion = torch.nn.CrossEntropyLoss()

train_loader, test_loader = load_mnist(class_set=class_set, train_dataset_number=2000, test_dataset_number=1000, batch_size=batch_size)
# train_loader, test_loader = load_fashion_mnist(class_set=class_set, train_dataset_number=2000, test_dataset_number=1000, batch_size=batch_size)

loss_list = []
accuracy_list = []

def train_net(network, train_loader, criterion, optimizer):
    network.train()  # put in train mode: we will modify the weights of the network
    train_loss = 0  # initialize the loss
    train_accuracy = 0  # initialize the accuracy

    # loop on the batches in the train dataset
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()  # important step to reset gradients to zero
        new_size = I
        adaptive_avg_pool = AdaptiveAvgPool2d((new_size, new_size))
        data = adaptive_avg_pool(data).to(device)
        # Run the network and compute the loss
        data = data.sum(dim=1, keepdim=True)
        target = target.squeeze()
        output = network(data)  # we run the network on the data
        loss = criterion(output,
                         target)  # we compare output to the target and compute the loss, using the chosen loss function
        train_loss += loss.item()  # we increment the total train loss

        # !!!Here we do the learning!!!
        loss.backward()  # backpropagation: the gradients are automatically computed by the autograd
        optimizer.step()  # specific optimization rule for changing the weights (stochastic gradient descent, Adam etc)
        # and change weighs

        # Getting the prediction of the network and computing the accuracy
        pred = output.argmax(dim=1, keepdim=True)  # the class chosen by the network is the highest output
        acc = pred.eq(target.view_as(pred)).sum().item()  # the accuracy is the proportion of correct classes
        train_accuracy += acc  # increment accuracy of whole test set

    scheduler.step()
    train_accuracy /= len(train_loader.dataset)  # compute mean accuracy
    train_loss /= (batch_idx + 1)  # mean loss
    return train_loss, train_accuracy

total_params = sum(p.numel() for p in network.parameters())
print(f"Number of parameters: {total_params}")

for epoch in range(40):
    train_loss, train_accuracy = train_net(network, train_loader, criterion, optimizer)
    loss_list.append(train_loss)
    accuracy_list.append(train_accuracy*100)

    print(f'Epoch {epoch}: Loss = {train_loss:.6f}, accuracy = {train_accuracy*100:.4f} %')

Number of parameters: 990
Epoch 0: Loss = 1.076774, accuracy = 65.9000 %
Epoch 1: Loss = 0.663204, accuracy = 78.1000 %
Epoch 2: Loss = 0.593665, accuracy = 80.7000 %
Epoch 3: Loss = 0.548637, accuracy = 83.2500 %
Epoch 4: Loss = 0.508211, accuracy = 84.6500 %
Epoch 5: Loss = 0.475095, accuracy = 84.6000 %
Epoch 6: Loss = 0.448297, accuracy = 85.7000 %
Epoch 7: Loss = 0.431784, accuracy = 87.0500 %
Epoch 8: Loss = 0.415835, accuracy = 87.1500 %
Epoch 9: Loss = 0.401060, accuracy = 87.9000 %
Epoch 10: Loss = 0.402729, accuracy = 87.5000 %
Epoch 11: Loss = 0.383350, accuracy = 87.8000 %
Epoch 12: Loss = 0.368387, accuracy = 89.0500 %
Epoch 13: Loss = 0.359229, accuracy = 88.7000 %
Epoch 14: Loss = 0.358670, accuracy = 89.5500 %
Epoch 15: Loss = 0.348897, accuracy = 89.4500 %
Epoch 16: Loss = 0.341985, accuracy = 89.5000 %
Epoch 17: Loss = 0.332074, accuracy = 89.8000 %
Epoch 18: Loss = 0.326789, accuracy = 89.8000 %
Epoch 19: Loss = 0.323423, accuracy = 89.9000 %
Epoch 20: Loss = 0.31814

## Testing

In [5]:
def eval_net(network, test_loader, criterion):
    network.eval()  # put in eval mode: we will just run, not modify the network
    test_loss = 0  # initialize the loss
    test_accuracy = 0  # initialize the accuracy

    with torch.no_grad():  # careful, we do not care about gradients here
        # loop on the batches in the test dataset
        for batch_idx, (data, target) in enumerate(test_loader):
            new_size = I
            adaptive_avg_pool = AdaptiveAvgPool2d((new_size, new_size))
            data = adaptive_avg_pool(data).to(device)
            # Run the network and compute the loss
            data = data.sum(dim=1, keepdim=True)
            target = target.squeeze()
            output = network(data)  # run the network on the test data
            loss = criterion(output,
                             target)  # compare the output to the target and compute the loss, using the chosen loss function
            test_loss += loss.item()  # increment the total test loss

            # Getting the prediction of the network and computing the accuracy
            pred = output.argmax(dim=1, keepdim=True)  # the class chosen by the network is the highest output
            acc = pred.eq(target.view_as(pred)).sum().item()  # the accuracy is the proportion of correct classes
            test_accuracy += acc  # increment accuracy of whole test set

    test_accuracy /= len(test_loader.dataset)  # compute mean accuracy
    test_loss /= (batch_idx + 1)  # mean loss
    return test_loss, test_accuracy

test_loss, test_accuracy = eval_net(network, test_loader, criterion)
print(f'Evaluation on test set: Loss = {test_loss:.6f}, accuracy = {test_accuracy*100:.4f} %')

Evaluation on test set: Loss = 0.496585, accuracy = 84.1000 %
