In [16]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import pandas as pd

# Create dataset object for dataloader for XOR dataset

In [17]:
class Iris_dataset(torch.utils.data.Dataset):
    def __init__(self, data, labels):
        self.x = torch.tensor(data, dtype=torch.float32)
        self.y = torch.tensor(labels, dtype=torch.long)
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [18]:
data = pd.read_csv('../data/IRIS.csv')
df_norm = data[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].apply(lambda x: (x - x.min()) / (x.max() - x.min()))
from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
target= labelencoder.fit_transform(data['species'])
target = pd.DataFrame(target)
target.rename(columns = {0:'species'}, inplace = True)
df = pd.concat([df_norm, target], axis=1)

In [61]:
class ConstructNet(nn.Module):
    def __init__(self, DNA, loss_fn, input_size, output_size, outputlayer):
        print(DNA)
        super(ConstructNet, self).__init__()
        self.DNA = DNA
        self.input_size = input_size
        self.output_size = output_size
        self.layers = []
        self.loss_fn = loss_fn
        
        # Append first layer
        self.layers.append(nn.Linear(self.input_size, self.DNA[0][1]))
        self.layers.append(nn.ReLU())
        
        for i in range(1, len(self.DNA)):
            print(self.DNA[i])
            if self.DNA[i][0] == "D":
                # The input size is the output of the last layer
                tmp_input_size = self.last_layer_output_size()
                self.layers.append(nn.Linear(tmp_input_size, self.DNA[i][1]))
                self.layers.append(nn.ReLU())
            if self.DNA[i][0] == "R":
                self.layers.append(nn.Dropout(self.DNA[i][1]))
        
        # Append the output layer        
        self.layers.append(nn.Linear(self.layers[-2].out_features, self.output_size))
        #if self.output_size > 1: 
        #    self.layers.append(nn.Softmax(dim=1))
        #else:
        #    self.layers.append(nn.Sigmoid())
        self.layers.append(outputlayer)
        self.net = nn.Sequential(*self.layers)
        
    def forward(self, x):
        return self.net(x)
    '''
    Based on the layers created, find the output size of the last dense layer
    '''
    def last_layer_output_size(self):
        for layer in self.layers[::-1]:
            if isinstance(layer, nn.Linear):
                return layer.out_features
            
    def train_net(self, device, train_loader, optimizer, epoch, log_interval, print_stats):
        self.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = self.forward(data)
            loss = self.loss_fn(output, target)
            loss.backward()
            optimizer.step()
            if batch_idx % log_interval == 0 and print_stats:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.item()))


    def test(self, device, test_loader, print_stats=False):
        self.eval()
        test_loss = 0
        correct = 0
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = self.forward(data)
                #print(f"output: {output}, target: {target}")
                test_loss += self.loss_fn.forward(output, target).item()  # sum up batch loss
                pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
                correct += pred.eq(target.view_as(pred)).sum().item()
        test_loss /= len(test_loader.dataset)
        accuracy = correct / len(test_loader.dataset)
        if print_stats:
            print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
                test_loss, correct, len(test_loader.dataset),
                100. * accuracy))
        return test_loss, accuracy
        
    def count_parameters(self):
        # https://discuss.pytorch.org/t/how-do-i-check-the-number-of-parameters-of-a-model/4325/7
        return sum(p.numel() for p in self.net.parameters() if p.requires_grad)        
   

In [20]:
DNA_list = [
    [["D",1024],["D",512],["D",128]],
    [["D",1024],["R",0.2],["D",512],["R",0.2],["D",128]],
    [["D",64],["D", 64]]
]

# Initialise hyperparameters

In [21]:
batch_size = 16
use_mps = False and torch.backends.mps.is_available()
use_cuda = True and torch.cuda.is_available()
test_batch_size = 16
epochs = 30
lr = 0.02
gamma = 0.7
seed = 1
log_interval = 100
save_model = False

if use_cuda:
    device = torch.device("cuda")
elif use_mps:
    device = torch.device("mps")
else:
    device = torch.device("cpu")
    
train_kwargs = {'batch_size': batch_size}
test_kwargs = {'batch_size': test_batch_size}

In [22]:
train_dataset = Iris_dataset(df.values[:,:4], df['species'])
test_dataset = Iris_dataset(df.values[:,:4], df['species'])
train_loader = torch.utils.data.DataLoader(train_dataset,shuffle=True,**train_kwargs)
test_loader = torch.utils.data.DataLoader(test_dataset,shuffle=True, **test_kwargs)

In [23]:
outputlayer = nn.Softmax(dim=1)
generations = [[]]

for DNA in DNA_list:
    inv_net = ConstructNet(DNA, nn.CrossEntropyLoss(), input_size=4, output_size=3, outputlayer=outputlayer)
    print(inv_net)
    optimizer = optim.Adam(inv_net.parameters(), lr=lr)

    for epoch in range(1, epochs + 1):
        inv_net.train_net(device, train_loader, optimizer, epoch, log_interval, print_stats=True)
        inv_net.test(device, test_loader)
    if save_model:
        torch.save(inv_net.state_dict(), "mnist_cnn.pt")
    generations[0].append(inv_net)

ConstructNet(
  (loss_fn): CrossEntropyLoss()
  (net): Sequential(
    (0): Linear(in_features=4, out_features=1024, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1024, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=128, bias=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=3, bias=True)
    (7): Softmax(dim=1)
  )
)
ConstructNet(
  (loss_fn): CrossEntropyLoss()
  (net): Sequential(
    (0): Linear(in_features=4, out_features=1024, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=1024, out_features=512, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.2, inplace=False)
    (6): Linear(in_features=512, out_features=128, bias=True)
    (7): ReLU()
    (8): Linear(in_features=128, out_features=3, bias=True)
    (9): Softmax(dim=1)
  )
)
ConstructNet(
  (loss_fn): CrossEntropyLoss()
  (net): Sequential(
    (0): Linear(in_features=4, out_features=64, bias=True)
    (1): ReLU(

In [24]:
generations

[[ConstructNet(
    (loss_fn): CrossEntropyLoss()
    (net): Sequential(
      (0): Linear(in_features=4, out_features=1024, bias=True)
      (1): ReLU()
      (2): Linear(in_features=1024, out_features=512, bias=True)
      (3): ReLU()
      (4): Linear(in_features=512, out_features=128, bias=True)
      (5): ReLU()
      (6): Linear(in_features=128, out_features=3, bias=True)
      (7): Softmax(dim=1)
    )
  ),
  ConstructNet(
    (loss_fn): CrossEntropyLoss()
    (net): Sequential(
      (0): Linear(in_features=4, out_features=1024, bias=True)
      (1): ReLU()
      (2): Dropout(p=0.2, inplace=False)
      (3): Linear(in_features=1024, out_features=512, bias=True)
      (4): ReLU()
      (5): Dropout(p=0.2, inplace=False)
      (6): Linear(in_features=512, out_features=128, bias=True)
      (7): ReLU()
      (8): Linear(in_features=128, out_features=3, bias=True)
      (9): Softmax(dim=1)
    )
  ),
  ConstructNet(
    (loss_fn): CrossEntropyLoss()
    (net): Sequential(
      (0)

# Start of GA code

In [103]:
import random
import numpy as np
class GA():
    def __init__(self, generation_length, population_size, initial_size, initial_depth, 
                 lossFn, input_size, output_size, outputLayer, device, train_dataloader, test_dataloader, optimizerFn, epoch):
        self.generation_length = generation_length
        self.population_size = population_size
        self.initial = True # No previous individuals can be used for mutations
        self.initial_size = initial_size
        self.initial_depth = initial_depth
        
        self.lossFn = lossFn
        self.optimizerFn = optimizerFn
        self.input_size = input_size
        self.output_size = output_size
        self.outputLayer = outputLayer
        
        self.device = device
        self.train_dataloader = train_dataloader
        self.test_dataloader = test_dataloader
        self.epoch = epoch
        self.population = []
        
        #self.generate_population()
    
    def auto_evolve(self):
        for i in range(self.generation_length):
            print(f"Generation #{i}")
            self.generate_population()
            self.train_population()
            self.evaluate_population()
            self.calculate_fitness()
            
    def generate_population(self):
        tmp_list = []
        for index,item in enumerate(self.population):
            tmp_list.append({
                'genome': item['genome'],
                'model': item['model'],
            })
        self.population = tmp_list
        del tmp_list
        
        if self.initial:
            for i in range(self.population_size):
                genome = self.generate_individual()
                self.population.append({'genome': genome})
        else:
            # Delete 25% of the population
            del self.population[:int(np.floor(self.population_size/4))]
            # Generate new individuals to have the same population size
            for i in range(self.population_size-len(self.population)):
                genome = self.generate_individual()
                self.population.append({'genome': genome})
        # Create models for each individual        
        for index,individual in enumerate(self.population):
            model = ConstructNet(individual['genome'], self.lossFn, self.input_size, self.output_size, self.outputLayer)
            individual['model'] = model
            self.population[index] = individual
        self.initial = False
            
    def generate_individual(self):
        genome = []
        if self.initial:
            for chromosome in range(random.randint(np.floor(self.initial_depth/2), np.ceil(self.initial_depth*2))):
                layer_size = random.randint(np.floor(self.initial_size/2), np.ceil(self.initial_size*2))
                genome.append(["D",layer_size])
        else:
            # Get the DNA of one of the best 5 individuals, that are at the end of the list
            random_index = random.randint(len(self.population)-5, len(self.population)-1)
            #print(random_index)
            original_genome = self.population[random_index]['genome'].copy()
            genome = self.mutate_genome(original_genome)
        #print(genome)
        return genome
    
    def mutate_genome(self, genome):
        # example genome: [["D",1024],["D",512],["D",128]]
        # possible mutations: Add new larger layer, add new smaller layer, change layer size, remove layer
        # 1. Add new larger layer
        if random.random() < 0.25:
            # select random layer
            layer_index = random.randint(0, len(genome)-1)
            new_layer_size = np.ceil(genome[layer_index][1] * (1 + random.random())).astype(int)
            genome.insert(layer_index, ["D",new_layer_size])
        # 2. Add new smaller layer
        if random.random() < 0.25:
            # select random layer
            layer_index = random.randint(0, len(genome)-1)
            new_layer_size = np.floor(genome[layer_index][1] * (1 - random.random())).astype(int)
            genome.insert(layer_index, ["D",new_layer_size])
        # 3. Change layer size
        if random.random() < 0.25:
            # select random layer
            layer_index = random.randint(0, len(genome)-1)
            new_layer_size = np.round(genome[layer_index][1] * random.random()).astype(int)
            genome[layer_index][1] = new_layer_size
        # 4. Remove layer
        if random.random() < 0.25 and len(genome) > 1:
            # select random layer
            layer_index = random.randint(0, len(genome)-1)
            del genome[layer_index]
        return genome
    
    def print_population(self, top_n=False):
        if not top_n:
            top_n = len(self.population)
        for index,individual in enumerate(self.population[:top_n]):
            print(f"Individual #{index}: {individual}")
            
    def train_population(self):
        print(self.population)
        for index,individual in enumerate(self.population):
            print(f"Training individual #{index}")
            if self.optimizerFn == "Adam":
                optimizer = torch.optim.Adam(individual['model'].parameters(), lr=0.01)
            individual['model'].train_net(self.device,self.train_dataloader, optimizer,self.epoch, 100, False)
    
    def evaluate_population(self):
        for index,individual in enumerate(self.population):
            print(f"Evaluating individual #{index}")
            loss, accuracy = individual['model'].test(self.device, self.test_dataloader)
            individual['id'] = index
            individual['loss'] = loss
            individual['accuracy'] = accuracy
            individual['parameters'] = individual['model'].count_parameters()
            self.population[index] = individual
        
    def calculate_fitness(self):
        # Create copy of list using only a few columns
        tmp_list = []
        for index,item in enumerate(self.population):
            tmp_list.append({
                'id' : item['id'],
                'accuracy': item['accuracy'],
                'parameters': item['parameters'],
                'fitness': 0
            })
        accuracy_list = sorted(tmp_list, key=lambda d: d['accuracy'])
        # Adding more emphasis on accuracy
        for index,item in enumerate(accuracy_list):
            accuracy_list[index]['fitness'] += index*2
        # Reverse sort, smaller is better
        parameter_size_list = sorted(accuracy_list, key=lambda d: d['parameters'], reverse=True)
        for index,item in enumerate(parameter_size_list):
            parameter_size_list[index]['fitness'] += index
        sorted_by_fitness = sorted(parameter_size_list, key=lambda d: d['fitness'])
        for fitness in sorted_by_fitness:
            for index,individual in enumerate(self.population):
                if individual['id'] == fitness['id']:
                    individual['fitness'] = fitness['fitness']
            self.population[index] = individual
        # Sort population
        self.population = sorted(self.population, key=lambda d: d['fitness'])
        #print(self.population)
        
            

In [104]:
train_dataset = Iris_dataset(df.values[:,:4], df['species'])
test_dataset = Iris_dataset(df.values[:,:4], df['species'])
train_loader = torch.utils.data.DataLoader(train_dataset,shuffle=True,**train_kwargs)
test_loader = torch.utils.data.DataLoader(test_dataset,shuffle=True, **test_kwargs)

generation = GA(generation_length=10,population_size=50,initial_size=256,initial_depth=5, lossFn=nn.CrossEntropyLoss(), input_size=4, output_size=3, outputLayer=nn.Softmax(dim=1), device=device, train_dataloader=train_loader, test_dataloader=test_loader, optimizerFn="Adam", epoch=100  )
generation.generate_population()
generation.print_population()

[['D', 331], ['D', 237], ['D', 493], ['D', 250], ['D', 343], ['D', 220], ['D', 436], ['D', 365], ['D', 341]]
['D', 237]
['D', 493]
['D', 250]
['D', 343]
['D', 220]
['D', 436]
['D', 365]
['D', 341]
[['D', 419], ['D', 413], ['D', 462], ['D', 451], ['D', 397], ['D', 274], ['D', 173]]
['D', 413]
['D', 462]
['D', 451]
['D', 397]
['D', 274]
['D', 173]
[['D', 246], ['D', 296], ['D', 245]]
['D', 296]
['D', 245]
[['D', 169], ['D', 172], ['D', 184], ['D', 498], ['D', 385], ['D', 186], ['D', 130]]
['D', 172]
['D', 184]
['D', 498]
['D', 385]
['D', 186]
['D', 130]
[['D', 211], ['D', 227], ['D', 337], ['D', 232]]
['D', 227]
['D', 337]
['D', 232]
[['D', 201], ['D', 143], ['D', 152], ['D', 168], ['D', 300], ['D', 357]]
['D', 143]
['D', 152]
['D', 168]
['D', 300]
['D', 357]
[['D', 470], ['D', 243], ['D', 423], ['D', 395], ['D', 330], ['D', 266], ['D', 290], ['D', 145], ['D', 470]]
['D', 243]
['D', 423]
['D', 395]
['D', 330]
['D', 266]
['D', 290]
['D', 145]
['D', 470]
[['D', 316], ['D', 432], ['D', 348]

In [105]:
generation.train_population()

[{'genome': [['D', 331], ['D', 237], ['D', 493], ['D', 250], ['D', 343], ['D', 220], ['D', 436], ['D', 365], ['D', 341]], 'model': ConstructNet(
  (loss_fn): CrossEntropyLoss()
  (net): Sequential(
    (0): Linear(in_features=4, out_features=331, bias=True)
    (1): ReLU()
    (2): Linear(in_features=331, out_features=237, bias=True)
    (3): ReLU()
    (4): Linear(in_features=237, out_features=493, bias=True)
    (5): ReLU()
    (6): Linear(in_features=493, out_features=250, bias=True)
    (7): ReLU()
    (8): Linear(in_features=250, out_features=343, bias=True)
    (9): ReLU()
    (10): Linear(in_features=343, out_features=220, bias=True)
    (11): ReLU()
    (12): Linear(in_features=220, out_features=436, bias=True)
    (13): ReLU()
    (14): Linear(in_features=436, out_features=365, bias=True)
    (15): ReLU()
    (16): Linear(in_features=365, out_features=341, bias=True)
    (17): ReLU()
    (18): Linear(in_features=341, out_features=3, bias=True)
    (19): Softmax(dim=1)
  )
)}, 

In [106]:
generation.evaluate_population()

Evaluating individual #0
Evaluating individual #1
Evaluating individual #2
Evaluating individual #3
Evaluating individual #4
Evaluating individual #5
Evaluating individual #6
Evaluating individual #7
Evaluating individual #8
Evaluating individual #9
Evaluating individual #10
Evaluating individual #11
Evaluating individual #12
Evaluating individual #13
Evaluating individual #14
Evaluating individual #15
Evaluating individual #16
Evaluating individual #17
Evaluating individual #18
Evaluating individual #19
Evaluating individual #20
Evaluating individual #21
Evaluating individual #22
Evaluating individual #23
Evaluating individual #24
Evaluating individual #25
Evaluating individual #26
Evaluating individual #27
Evaluating individual #28
Evaluating individual #29
Evaluating individual #30
Evaluating individual #31
Evaluating individual #32
Evaluating individual #33
Evaluating individual #34
Evaluating individual #35
Evaluating individual #36
Evaluating individual #37
Evaluating individual 

In [107]:
generation.print_population()

Individual #0: {'genome': [['D', 331], ['D', 237], ['D', 493], ['D', 250], ['D', 343], ['D', 220], ['D', 436], ['D', 365], ['D', 341]], 'model': ConstructNet(
  (loss_fn): CrossEntropyLoss()
  (net): Sequential(
    (0): Linear(in_features=4, out_features=331, bias=True)
    (1): ReLU()
    (2): Linear(in_features=331, out_features=237, bias=True)
    (3): ReLU()
    (4): Linear(in_features=237, out_features=493, bias=True)
    (5): ReLU()
    (6): Linear(in_features=493, out_features=250, bias=True)
    (7): ReLU()
    (8): Linear(in_features=250, out_features=343, bias=True)
    (9): ReLU()
    (10): Linear(in_features=343, out_features=220, bias=True)
    (11): ReLU()
    (12): Linear(in_features=220, out_features=436, bias=True)
    (13): ReLU()
    (14): Linear(in_features=436, out_features=365, bias=True)
    (15): ReLU()
    (16): Linear(in_features=365, out_features=341, bias=True)
    (17): ReLU()
    (18): Linear(in_features=341, out_features=3, bias=True)
    (19): Softmax(d

In [108]:
generation.calculate_fitness()

# Now use the automated evolve

In [109]:
train_dataset = Iris_dataset(df.values[:,:4], df['species'])
test_dataset = Iris_dataset(df.values[:,:4], df['species'])
train_loader = torch.utils.data.DataLoader(train_dataset,shuffle=True,**train_kwargs)
test_loader = torch.utils.data.DataLoader(test_dataset,shuffle=True, **test_kwargs)

generation = GA(generation_length=10,population_size=50,initial_size=256,initial_depth=5, lossFn=nn.CrossEntropyLoss(), input_size=4, output_size=3, outputLayer=nn.Softmax(dim=1), device=device, train_dataloader=train_loader, test_dataloader=test_loader, optimizerFn="Adam", epoch=100  )
generation.auto_evolve()

Generation #0
[['D', 143], ['D', 512], ['D', 291], ['D', 467], ['D', 480], ['D', 474], ['D', 448], ['D', 480], ['D', 366], ['D', 251]]
['D', 512]
['D', 291]
['D', 467]
['D', 480]
['D', 474]
['D', 448]
['D', 480]
['D', 366]
['D', 251]
[['D', 405], ['D', 215], ['D', 398], ['D', 416], ['D', 182], ['D', 418], ['D', 153], ['D', 134], ['D', 267]]
['D', 215]
['D', 398]
['D', 416]
['D', 182]
['D', 418]
['D', 153]
['D', 134]
['D', 267]
[['D', 362], ['D', 428], ['D', 264], ['D', 311], ['D', 375], ['D', 144], ['D', 308], ['D', 208]]
['D', 428]
['D', 264]
['D', 311]
['D', 375]
['D', 144]
['D', 308]
['D', 208]
[['D', 338], ['D', 239], ['D', 281], ['D', 507], ['D', 441], ['D', 419], ['D', 227], ['D', 236], ['D', 326], ['D', 475]]
['D', 239]
['D', 281]
['D', 507]
['D', 441]
['D', 419]
['D', 227]
['D', 236]
['D', 326]
['D', 475]
[['D', 338], ['D', 157], ['D', 155], ['D', 413], ['D', 336], ['D', 277], ['D', 161], ['D', 237], ['D', 477], ['D', 398]]
['D', 157]
['D', 155]
['D', 413]
['D', 336]
['D', 277]



Training individual #3
Training individual #4
Training individual #5
Training individual #6
Training individual #7
Training individual #8
Training individual #9
Training individual #10
Training individual #11
Training individual #12
Training individual #13
Training individual #14
Training individual #15
Training individual #16
Training individual #17
Training individual #18
Training individual #19
Training individual #20
Training individual #21
Training individual #22
Training individual #23
Training individual #24
Training individual #25
Training individual #26
Training individual #27
Training individual #28
Training individual #29
Training individual #30
Training individual #31
Training individual #32
Training individual #33
Training individual #34
Training individual #35
Training individual #36
Training individual #37
Training individual #38
Training individual #39
Training individual #40
Training individual #41
Training individual #42
Training individual #43
Training individual #44

# Best architectures

In [110]:
generation.print_population()

Individual #0: {'genome': [['D', 166], ['D', 142], ['D', 1]], 'model': ConstructNet(
  (loss_fn): CrossEntropyLoss()
  (net): Sequential(
    (0): Linear(in_features=4, out_features=166, bias=True)
    (1): ReLU()
    (2): Linear(in_features=166, out_features=142, bias=True)
    (3): ReLU()
    (4): Linear(in_features=142, out_features=1, bias=True)
    (5): ReLU()
    (6): Linear(in_features=1, out_features=3, bias=True)
    (7): Softmax(dim=1)
  )
), 'id': 2, 'loss': 0.07327378352483113, 'accuracy': 0.3333333333333333, 'parameters': 24693, 'fitness': 10}
Individual #1: {'genome': [['D', 177], ['D', 183], ['D', 1], ['D', 72], ['D', 43], ['D', 26]], 'model': ConstructNet(
  (loss_fn): CrossEntropyLoss()
  (net): Sequential(
    (0): Linear(in_features=4, out_features=177, bias=True)
    (1): ReLU()
    (2): Linear(in_features=177, out_features=183, bias=True)
    (3): ReLU()
    (4): Linear(in_features=183, out_features=1, bias=True)
    (5): ReLU()
    (6): Linear(in_features=1, out_f