In [1]:
import torch
import torch.nn as nn  # the neural network library of pytorch
from load_dataset import load_MNIST, reduce_MNIST_dataset
from torch.nn import AdaptiveAvgPool2d

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        # first convolutionnal layer
        self.conv1 = nn.Conv2d(
            in_channels=1,
            out_channels=4,
            kernel_size=4,
            stride=2,
            padding=3
        )

        # first pooling layer
        self.pool1 = nn.MaxPool2d(kernel_size = 2, stride=2)

        # second convolutionnal layer
        self.conv2 = nn.Conv2d(
            in_channels=4,
            out_channels=4,
            kernel_size=4,
            stride=2,
            padding=3
        )

        #second pooling layer
        self.pool2 = nn.MaxPool2d(kernel_size = 2, stride=2)

        # Flatten
        self.flat = nn.Flatten()

        # fully connected layer, output 10 classes
        self.fc = nn.Linear(16, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        # x = F.relu(x)
        x = self.conv2(x)
        x = self.pool2(x)
        # x = F.relu(x)
        x = self.flat(x)
        output = self.fc(x)
        return output    # return x for visualization

In [5]:
batch_size = 10  # the number of examples per batch
train_loader, test_loader = load_MNIST(batch_size=batch_size)
scala = 60
reduced_loader = reduce_MNIST_dataset(train_loader, scala)
I = 16
device = torch.device("cpu")

conv_network = CNN()
learning_rate = 1e-2  # the scale of the changes applied to the weights
optimizer = torch.optim.Adam(conv_network.parameters(), lr=learning_rate)
criterion = torch.nn.CrossEntropyLoss()
# test_loss, test_accuracy = eval_net(conv_network, test_loader, criterion)
# print(f'Evaluation on test set: Loss = {test_loss:.6f}, accuracy = {test_accuracy*100:.4f} %')

loss_list = []
accuracy_list = []

def train_net(network, train_loader, criterion, optimizer):
    network.train()  # put in train mode: we will modify the weights of the network
    train_loss = 0  # initialize the loss
    train_accuracy = 0  # initialize the accuracy

    # loop on the batches in the train dataset
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()  # important step to reset gradients to zero
        new_size = I
        adaptive_avg_pool = AdaptiveAvgPool2d((new_size, new_size))
        data = adaptive_avg_pool(data).to(device)
        # Run the network and compute the loss
        output = network(data)  # we run the network on the data
        loss = criterion(output,
                         target)  # we compare output to the target and compute the loss, using the chosen loss function
        train_loss += loss.item()  # we increment the total train loss

        # !!!Here we do the learning!!!
        loss.backward()  # backpropagation: the gradients are automatically computed by the autograd
        optimizer.step()  # specific optimization rule for changing the weights (stochastic gradient descent, Adam etc)
        # and change weighs

        # Getting the prediction of the network and computing the accuracy
        pred = output.argmax(dim=1, keepdim=True)  # the class chosen by the network is the highest output
        acc = pred.eq(target.view_as(pred)).sum().item()  # the accuracy is the proportion of correct classes
        train_accuracy += acc  # increment accuracy of whole test set

    train_accuracy /= len(train_loader.dataset)  # compute mean accuracy
    train_loss /= (batch_idx + 1)  # mean loss
    return train_loss, train_accuracy

for epoch in range(10):
    train_loss, train_accuracy = train_net(conv_network, train_loader, criterion, optimizer)
    loss_list.append(train_loss)
    accuracy_list.append(train_accuracy*100)
    
    print(f'Epoch {epoch}: Loss = {train_loss:.6f}, accuracy = {train_accuracy*100:.4f} %')

Epoch 0: Loss = 1.908315, accuracy = 31.3000 %
Epoch 1: Loss = 1.015189, accuracy = 66.2000 %
Epoch 2: Loss = 0.702265, accuracy = 78.2000 %
Epoch 3: Loss = 0.620638, accuracy = 80.1000 %
Epoch 4: Loss = 0.518471, accuracy = 83.8000 %
Epoch 5: Loss = 0.445572, accuracy = 86.1000 %
Epoch 6: Loss = 0.447635, accuracy = 86.1000 %
Epoch 7: Loss = 0.385859, accuracy = 87.7000 %
Epoch 8: Loss = 0.363162, accuracy = 89.7000 %
Epoch 9: Loss = 0.355748, accuracy = 88.8000 %


In [6]:
total_params = sum(p.numel() for p in conv_network.parameters())
print(f"Number of parameters: {total_params}")

Number of parameters: 498


In [7]:
def eval_net(network, test_loader, criterion):
    network.eval()  # put in eval mode: we will just run, not modify the network
    test_loss = 0  # initialize the loss
    test_accuracy = 0  # initialize the accuracy

    with torch.no_grad():  # careful, we do not care about gradients here
        # loop on the batches in the test dataset
        for batch_idx, (data, target) in enumerate(test_loader):
            new_size = I
            adaptive_avg_pool = AdaptiveAvgPool2d((new_size, new_size))
            data = adaptive_avg_pool(data).to(device)
            # Run the network and compute the loss
            output = network(data)  # run the network on the test data
            loss = criterion(output,
                             target)  # compare the output to the target and compute the loss, using the chosen loss function
            test_loss += loss.item()  # increment the total test loss

            # Getting the prediction of the network and computing the accuracy
            pred = output.argmax(dim=1, keepdim=True)  # the class chosen by the network is the highest output
            acc = pred.eq(target.view_as(pred)).sum().item()  # the accuracy is the proportion of correct classes
            test_accuracy += acc  # increment accuracy of whole test set

    test_accuracy /= len(test_loader.dataset)  # compute mean accuracy
    test_loss /= (batch_idx + 1)  # mean loss
    return test_loss, test_accuracy

test_loss, test_accuracy = eval_net(conv_network, test_loader, criterion)
print(f'Evaluation on test set: Loss = {test_loss:.6f}, accuracy = {test_accuracy*100:.4f} %')

Evaluation on test set: Loss = 0.669293, accuracy = 80.2700 %
