# GA-based optimization of MLP hyperparameters

### Imports

In [96]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
import numpy as np
from random import shuffle
from operator import itemgetter

### Load and pre-process data

In [97]:
bc = load_breast_cancer()
x_train, x_test, y_train, y_test = train_test_split(bc.data,bc.target)
pca = PCA(n_components=10).fit(bc.data)
xtr_new = pca.transform(x_train)
xte_new = pca.transform(x_test)
scaler = StandardScaler().fit(xtr_new)
xtr_new = scaler.transform(xtr_new)
xte_new = scaler.transform(xte_new)

### Create initial population

In [103]:
initial_population_size = 100
population = [np.random.randint(1,31,(5,)) for i in range(initial_population_size)]
print(population)

[array([24, 15, 19, 25,  9]), array([ 1, 27,  1,  2,  6]), array([21, 19, 20, 16, 15]), array([10, 23, 28,  7,  5]), array([30, 27,  6, 23, 27]), array([22,  6, 26,  1,  7]), array([ 4, 22,  5, 25, 20]), array([11, 14, 26, 25,  5]), array([13, 18, 28, 12, 12]), array([22,  6,  9, 14, 25]), array([13, 20, 29,  6,  7]), array([22, 26,  7,  6, 24]), array([ 2,  8, 26, 17,  3]), array([20, 29, 22, 30, 20]), array([16, 20, 21, 14, 18]), array([27, 17,  8,  1, 23]), array([ 3,  7, 24, 17, 27]), array([27, 16, 30, 14, 21]), array([21,  4, 22, 24, 29]), array([14, 15, 29, 23, 16]), array([18, 20, 15,  5, 11]), array([10, 28,  2, 17,  8]), array([23, 19, 22, 30,  6]), array([14,  7,  1,  2, 27]), array([11, 16,  6, 14, 23]), array([29, 11, 28,  2,  5]), array([16, 15,  8,  8, 22]), array([ 2, 19, 11,  2, 23]), array([27,  7,  4, 26,  2]), array([27, 20, 22, 16, 19]), array([11, 26, 11, 21, 11]), array([24,  2,  2,  8, 28]), array([11,  9, 18, 12, 14]), array([24,  5, 20, 25, 20]), array([21,  8

### Train and test generation

In [99]:
scores = []

for generation in range(initial_population_size/2):
    del scores[:]
    for sample in population:
        nn = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=sample, random_state=1)
        nn.fit(xtr_new,y_train)
        scores.append([nn.score(xte_new,y_test),sample])
        scores.sort(key=lambda x: x[0])
    
    if len(scores)==2: 
        break
    # create new population
    del scores[-2:]
    shuffle(scores)
    population = [model[1] for model in scores]
    new_population = []
    for index in range(len(population))[0:-1:2]:
        new_population.append(np.concatenate((population[index][:3],population[index+1][3:])))
        new_population.append(np.concatenate((population[index][3:],population[index+1][:3])))
    population = list(new_population)
    print("Generation %d out of %d: done!" %(generation+1,initial_population_size/2))

Generation 1 out of 25: done!
Generation 2 out of 25: done!
Generation 3 out of 25: done!
Generation 4 out of 25: done!
Generation 5 out of 25: done!
Generation 6 out of 25: done!
Generation 7 out of 25: done!
Generation 8 out of 25: done!
Generation 9 out of 25: done!
Generation 10 out of 25: done!
Generation 11 out of 25: done!
Generation 12 out of 25: done!
Generation 13 out of 25: done!
Generation 14 out of 25: done!
Generation 15 out of 25: done!
Generation 16 out of 25: done!
Generation 17 out of 25: done!
Generation 18 out of 25: done!
Generation 19 out of 25: done!
Generation 20 out of 25: done!
Generation 21 out of 25: done!
Generation 22 out of 25: done!
Generation 23 out of 25: done!
Generation 24 out of 25: done!


In [100]:
print(scores)

[[0.6153846153846154, array([12, 14,  1,  1, 15])], [0.9230769230769231, array([26, 26, 20,  2, 14])]]
