Define the neural network architecture, load the dataset and define train, test and freeze functions

Load the pre-trained neural network model and test it to check the accuracy

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
from nn_utils import Net, DEVICE, TRAINLOADER, train_nn, test_nn, freeze_parameters

torch.cuda.empty_cache()

PATH = './nn-models/cifar10-nn-model'

# load the pretrained NN model
net = Net()
net.load_state_dict(torch.load(PATH))
net.to(device=DEVICE)

test_nn(net=net, verbose=True)

NSGA-II

In [None]:
import array
import random
import json
import numpy as np
from deap import base
from deap.benchmarks.tools import diversity, convergence, hypervolume
from deap import creator
from deap import tools
import matplotlib.pyplot as plt

freeze_parameters(net=net)

LOW_BOUND = -0.1
HIGH_BOUND = 0.3
N_GENERATIONS = 200
MU = 200
CX_PB = 0.9
MUTATE_PROB = 0.1

# store all the training dataset in a single batch
ALL_IMAGES = []
ALL_LABELS = []
for mini_batch in TRAINLOADER:
    ALL_IMAGES.append(mini_batch[0])
    ALL_LABELS.append(mini_batch[1])

ALL_IMAGES = torch.cat(ALL_IMAGES)
ALL_LABELS = torch.cat(ALL_LABELS)

# count the number of dimensions of the last layer
N_DIMENSION = 0
for param in net.out.parameters():
    N_DIMENSION += param.numel()

# loss function
def f1(individual):
    # take the parameters from the individual and replace the last layer of the NN with them
    parameters = torch.as_tensor(individual, dtype=torch.float32, device=DEVICE)

    net.out.weight = torch.nn.Parameter(data=parameters[0:5120].reshape(10, 512))
    net.out.bias = torch.nn.Parameter(data=parameters[5120:5130])
   
    # no need to calculate the gradient 
    with torch.no_grad():
        # get a mini-batch from the training dataset
        images = ALL_IMAGES[0:1000].to(device=DEVICE)
        labels = ALL_LABELS[0:1000].to(device=DEVICE)

        preds = net(images) # forward mini-batch
        loss = F.cross_entropy(preds, labels) # calculate loss

    return loss.item()

# Gaussian regulariser (sum of the square of the weights)
def f2():
    squared_weights = []
    for param_name, param in net.named_parameters():
        squared_weight = torch.square(param.data)
        squared_weights.append(squared_weight)

    sum = 0
    for param in squared_weights:
        sum += torch.sum(param)
        
    return sum.detach().cpu().numpy()

def obj(individual):
    return (f1(individual=individual), f2()) 

def uniform(low, up, size=None):
    try:
        return [random.uniform(a, b) for a, b in zip(low, up)]
    except TypeError:
        return [random.uniform(a, b) for a, b in zip([low] * size, [up] * size)]

creator.create("FitnessMin", base.Fitness, weights=(-1.0, -1.0))
creator.create("Individual", list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()

toolbox.register("attr_float", uniform, LOW_BOUND, HIGH_BOUND, N_DIMENSION)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.attr_float)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("evaluate", obj)
toolbox.register("mate", tools.cxSimulatedBinaryBounded, low=LOW_BOUND, up=HIGH_BOUND, eta=20.0)
toolbox.register("mutate", tools.mutPolynomialBounded, low=LOW_BOUND, up=HIGH_BOUND, eta=20.0, indpb=1.0/N_DIMENSION)
toolbox.register("select", tools.selNSGA2)

def nsga_ii():
    # generate initial random population of individuals (parameters)
    pop = toolbox.population(n=MU)

    # evaluate the individuals with an invalid fitness
    invalid_ind = [ind for ind in pop if not ind.fitness.valid]
    fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
    for ind, fit in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit

    # this is just to assign the crowding distance to
    # the individuals no actual selection is done
    pop = toolbox.select(pop, len(pop))
    
    # begin the generational process
    for gen in range(1, N_GENERATIONS):
        # vary the population
        offspring = tools.selTournamentDCD(pop, len(pop))
        
        # selTournamentDCD means Tournament selection based on dominance (D) 
        # followed by crowding distance (CD). This selection requires the 
        # individuals to have a crowding_dist attribute
        offspring = [toolbox.clone(ind) for ind in offspring]
        
        # crossover make pairs of all (even, odd) in offspring
        for ind1, ind2 in zip(offspring[::2], offspring[1::2]):
            if random.random() <= CX_PB:
                toolbox.mate(ind1, ind2)
                del ind1.fitness.values
                del ind2.fitness.values

        # mutation
        for mutant in offspring:
            if random.random() <= MUTATE_PROB:
                toolbox.mutate(mutant)
                del mutant.fitness.values

        # evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit

        # select the next generation population
        pop = toolbox.select(pop + offspring, MU)
        
        print(f'generation {gen} finished')

    return pop
        
pop = nsga_ii()

In [None]:
pop.sort(key=lambda x: x.fitness.values)

front = np.array([ind.fitness.values for ind in pop])
plt.scatter(front[:,0], front[:,1], c="b")
plt.axis("tight")
plt.xlabel('Loss function')
plt.ylabel('Sum of the squared weights')
plt.show()

In [None]:
# put best parameters back into the neural network
parameters = torch.as_tensor(pop[0], dtype=torch.float32, device=DEVICE)
net.out.weight = torch.nn.Parameter(data=parameters[0:5120].reshape(10, 512))
net.out.bias = torch.nn.Parameter(data=parameters[5120:5130])

# test the neural network
test_nn(net=net, verbose=True)