Define the neural network architecture, load the dataset and define train, test and freeze functions

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
    )

print(f"Using {device} device")

# Define the neural network architecture

def conv_block(in_channels, out_channels, pool=False):
    layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1), 
              nn.BatchNorm2d(out_channels), 
              nn.ReLU(inplace=True)]
    if pool: layers.append(nn.MaxPool2d(2))
    return nn.Sequential(*layers)

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1 = conv_block(3, 64)
        self.conv2 = conv_block(64, 128, pool=True)
        
        self.res1 = nn.Sequential(conv_block(128, 128), conv_block(128, 128))
        
        self.conv3 = conv_block(128, 256, pool=True)
        self.conv4 = conv_block(256, 512, pool=True)
        
        self.res2 = nn.Sequential(conv_block(512, 512), conv_block(512, 512))
        
        self.out = nn.Linear(512, 10)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)

        x = self.res1(x) + x
        
        x = self.conv3(x)
        x = self.conv4(x)
        
        x = self.res2(x) + x
        
        x = F.max_pool2d(x, kernel_size=4)
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = self.out(x)

        return x

# Load the dataset

import torchvision
import torchvision.transforms as transforms

MINI_BATCH_SIZE = 128

transform = transforms.Compose(
    [transforms.ToTensor()]
    )

trainset = torchvision.datasets.CIFAR10(
    root="./data",
    train=True,
    download=True,
    transform=transform
    )

trainloader = torch.utils.data.DataLoader(
    trainset,
    batch_size=MINI_BATCH_SIZE,
    shuffle=True, # reshuffle data at every epoch
    num_workers=2
    )

testset = torchvision.datasets.CIFAR10(
    root="./data",
    train=False,
    download=True,
    transform=transform
    )

testloader = torch.utils.data.DataLoader(
    testset,
    batch_size=MINI_BATCH_SIZE,
    shuffle=False,
    num_workers=2
    )

classes = ("plane", "car", "bird", "cat", "deer",
           "dog", "frog", "horse", "ship", "truck")

# Function definitions to train, test, and freeze the parameters of the neural network

import matplotlib.pyplot as plt
import numpy as np

''' Train the neural network using backpropagation with cross entropy as the loss function '''
def train_nn(net: nn.Module, epochs: int, optimizer: torch.optim.Optimizer):
  print(f'Initialising training ...')
  print(f'- Epochs: {epochs}')
  print(f'- Mini batch size: {MINI_BATCH_SIZE}')
  print(f'- Optimiser: {optimizer}')
  print(f'- Loss function: {F.cross_entropy.__name__}')

  evaluation_loss_track = []
  running_loss_track = []
  accuracy_track = []

  # loop over the dataset multiple times
  for epoch in range(epochs):
    running_loss = 0

    # loop over the dataset and get mini-batch
    for mini_batch in trainloader:
      images = mini_batch[0].to(device)
      labels = mini_batch[1].to(device)

      optimizer.zero_grad() # zero the parameter gradients

      preds = net(images) # forward mini-batch

      loss = F.cross_entropy(preds, labels) # calculate loss
      loss.backward() # calculate gradients with respect to each weight
      optimizer.step() # update weights

      running_loss += loss.item()

      # track
      evaluation_loss_track.append(loss.item())

    accuracy = test_nn(net=net, verbose=False)
    print(f'\nEpoch {epoch} finished -- Running loss {running_loss} -- Accuracy {accuracy}')

    # track
    running_loss_track.append(running_loss)
    accuracy_track.append(accuracy)

  # plot
  fig, ax1 = plt.subplots()

  ax1.set_xlabel('Iterations over entire dataset (Epoch)')
  
  ax1.set_ylabel('Accuracy', color='b')
  ax1.plot(np.array(accuracy_track), '--b', label='Accuracy', linewidth=0.5)

  ax2 = ax1.twinx()
  ax2.set_ylabel('Running loss per epoch', color='r')
  ax2.plot(np.array(running_loss_track), '--r', label='Loss per epoch', linewidth=0.5)

  fig.tight_layout()
  fig.legend()
  plt.show()

''' Test the neural network '''
def test_nn(net: nn.Module, verbose: bool):
    # test the neural network
    correct = 0
    total = 0
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images = data[0].to(device)
            labels = data[1].to(device)

            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct // total

    if verbose:
        print('Testing on 10,000 test images ...')
        print(f'- Correct: {correct}')
        print(f'- Total: {total}')
        print(f'- Accuracy: {accuracy}')

    return accuracy

''' Freeze all the parameters except the last layer and randomize last layer '''
def freeze_parameters(net: nn.Module):
    # freeze all the parameters in the NN
    for param in net.parameters():
        param.requires_grad = False

    # unfreeze all the parameters from the last layer and randomise the weights
    for param in net.out.parameters():
        param.requires_grad = True
        param.data = torch.rand(param.size(), device=device)

Load the pre-trained neural network model and test it to check the accuracy

In [None]:
PATH = './nn-models/cifar10-nn-model'

# load the pretrained NN model
net = Net()
net.load_state_dict(torch.load(PATH))
net.to(device=device)

test_nn(net=net, verbose=True)

GA

In [None]:
import array
import random
import json
import numpy as np
from deap import base
from deap.benchmarks.tools import diversity, convergence, hypervolume
from deap import creator
from deap import tools
import matplotlib.pyplot as plt

# store all the training dataset in a single batch
ALL_DATA = []
for batch in trainloader:
    ALL_DATA.extend(batch)

# count the number of dimensions of the last layer
N_DIMENSION = 0
for param in net.out.parameters():
    N_DIMENSION += param.numel()

LOW_BOUND = -1.0
HIGH_BOUND = 1.0
N_BITS = 8
N_GENERATIONS = 250
MU = 100
CX_PB = 0.9
UNIFORM_CX_PB = 0.5
MUTATE_PB = 0.1
MUTATE_FLIP_PB = 1.0 / (N_DIMENSION * N_BITS)
ELITISM = 5

def decode(individual):
    real_numbers = []
    for i in range(N_DIMENSION):
        chromosome = individual[i*N_BITS:(i+1)*N_BITS]
        bit_string = ''.join(map(str, chromosome))
        num_as_int = int(bit_string, 2) # convert to int from base 2 list
        num_in_range = LOW_BOUND + (HIGH_BOUND - LOW_BOUND) * num_as_int / 2**N_BITS
        real_numbers.append(num_in_range)

    return real_numbers

def calculate_fitness(individual):
    # put the parameters into the neural network
    parameters = decode(individual=individual)
    parameters = torch.as_tensor(parameters, dtype=torch.float32, device=device)

    net.out.weight = torch.nn.Parameter(data=parameters[0:20480].reshape(10, 2048))
    net.out.bias = torch.nn.Parameter(data=parameters[20480:20490])

    # go over the dataset once
    with torch.no_grad():
      images = ALL_DATA[0].to(device)
      labels = ALL_DATA[1].to(device)

      preds = net(images) # predict entire dataset
      loss = F.cross_entropy(preds, labels) # calculate loss

    return loss.item(),

creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()

toolbox.register("attr_bool", random.randint, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, N_BITS*N_DIMENSION)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("evaluate", calculate_fitness)
toolbox.register("crossover", tools.cxUniform, indpb=UNIFORM_CX_PB)
toolbox.register("mutate", tools.mutFlipBit, indpb=MUTATE_FLIP_PB)
toolbox.register("select", tools.selTournament, fit_attr='fitness')

def ga():
    # generate initial random population of individuals (parameters)
    pop = toolbox.population(n=MU)

    # evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit

    # track the performance of each generation
    gen_performance = [] 

    # begin the generational process
    for gen in range(1, N_GENERATIONS):
        print(f'==== Generation {gen} ====')

        # select the next generation individuals
        offspring = tools.selBest(pop, ELITISM) + toolbox.select(pop, len(pop)-ELITISM, 2)
        # clone the selected individuals
        offspring = list(map(toolbox.clone, offspring))
        
        # crossover make pairs of all (even, odd) in offspring
        for ind1, ind2 in zip(offspring[::2], offspring[1::2]):
            if random.random() <= CX_PB:
                toolbox.crossover(ind1, ind2)
                del ind1.fitness.values
                del ind2.fitness.values

        # mutation
        for mutant in offspring:
            if random.random() <= MUTATE_PB:
                toolbox.mutate(mutant)
                del mutant.fitness.values

        # evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit
        
        # population is entirely replaced by the offspring
        pop[:] = offspring

        # track the best individual at this generation
        fits = [ind.fitness.values[0] for ind in pop]
        gen_performance.append(min(fits))

    return pop, gen_performance
        
pop, gen_performance = ga()

In [None]:
best_individual = tools.selBest(pop, 1)[0]

plt.plot(np.array(gen_performance), 'r')
plt.show()

In [None]:
# put best parameters back into the neural network
parameters = torch.as_tensor(best_individual, dtype=torch.float32, device=device)
net.out.weight = torch.nn.Parameter(data=parameters[0:20480].reshape(10, 2048))
net.out.bias = torch.nn.Parameter(data=parameters[20480:20490])

# test the neural network
test_nn(net=net, verbose=True)