In [30]:
import numpy as np
import pandas as pd

data = pd.read_csv('MNIST.csv')
data = np.array(data)
np.random.shuffle(data)

m, n = data.shape

data_test = data[0:1000].T
Y_test = data_test[0]
X_test = data_test[1:n]
X_test = X_test / 255

data_train = data[1000:m].T
Y_train = data_train[0]
X_train = data_train[1:n]
X_train = X_train / 255

_, m_train = X_train.shape

def ReLU(Z):
    return np.maximum(Z, 0)

def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A

class Neural_Network:
    def __init__(self, input_size, hidden_1_size, output_size, bias = 1):
        self.input_size = input_size
        self.hidden_1_size = hidden_1_size
        self.output_size = output_size

        self.fitness = 0

        self.W1 = np.random.rand(hidden_1_size, input_size) - 0.5
        self.b1 = np.random.rand(hidden_1_size, bias) - 0.5

        self.W2 = np.random.rand(output_size, hidden_1_size) - 0.5
        self.b2 = np.random.rand(output_size, bias) - 0.5

    def copy(self):
        new_nn = Neural_Network(0, 0, 0)
        new_nn.W1 = self.W1.copy()
        new_nn.b1 = self.b1.copy()
        new_nn.W2 = self.W2.copy()
        new_nn.b2 = self.b2.copy()
        return new_nn

    def forward_prop(self, X):
        self.Z1 = self.W1.dot(X) + self.b1
        self.A1 = ReLU(self.Z1)

        self.Z2 = self.W2.dot(self.A1)
        self.A2 = softmax(self.Z2)

        return self.A2


def fitness_function(network, X_train, Y_train):
    network.forward_prop(X_train)
    predictions = np.argmax(network.A2, axis=0)
    network.fitness = np.sum(predictions == Y_train) / Y_train.size
    return network.fitness

def init_pop(pop_size):
    population = []
    for _ in range(pop_size):
        network = Neural_Network(784, 10, 10)
        population.append(network)
    return population

def select_parents(population, X, y):
    fitness_scores = [fitness_function(network, X, y) for network in population]
    total_fitness = sum(fitness_scores)
    probabilities = [score / total_fitness for score in fitness_scores]
    selected_indices = np.random.choice(len(population), size = 2, p = probabilities, replace = False)
    parent_1 = population[selected_indices[0]].copy()
    parent_2 = population[selected_indices[1]].copy()
    return parent_1, parent_2

def crossover(parent_1, parent_2):
    child = Neural_Network(0, 0, 0)  # Initialise with dummy values

    # Crossover W1
    mask = np.random.rand(*parent_1.W1.shape) < 0.5
    child.W1 = np.where(mask, parent_1.W1, parent_2.W1)

    # Crossover b1
    mask = np.random.rand(*parent_1.b1.shape) < 0.5
    child.b1 = np.where(mask, parent_1.b1, parent_2.b1)

    # Crossover W2
    mask = np.random.rand(*parent_1.W2.shape) < 0.5
    child.W2 = np.where(mask, parent_1.W2, parent_2.W2)

    # Crossover b2
    mask = np.random.rand(*parent_1.b2.shape) < 0.5
    child.b2 = np.where(mask, parent_1.b2, parent_2.b2)

    return child

def mutate(network, mutation_rate):
    # Mutate W1
    mutation_mask = np.random.rand(*network.W1.shape) < mutation_rate
    network.W1 += mutation_mask * np.random.randn(*network.W1.shape) * 0.1

    # Mutate b1
    mutation_mask = np.random.rand(*network.b1.shape) < mutation_rate
    network.b1 += mutation_mask * np.random.randn(*network.b1.shape) * 0.1

    # Mutate W2
    mutation_mask = np.random.rand(*network.W2.shape) < mutation_rate
    network.W2 += mutation_mask * np.random.randn(*network.W2.shape) * 0.1

    # Mutate b2
    mutation_mask = np.random.rand(*network.b2.shape) < mutation_rate
    network.b2 += mutation_mask * np.random.randn(*network.b2.shape) * 0.1

    return network

In [36]:
pop_size = 20
mutation_rate = 0.05
generations = 150


population = init_pop(pop_size)
best_ever_network = max(population, key=lambda x: fitness_function(x, X_train, Y_train))

# for generation in range(generations):
generation = -1
while (best_ever_network.fitness < 0.9):
    generation += 1

    best_network = max(population, key=lambda x: fitness_function(x, X_train, Y_train))

    if (fitness_function(best_network, X_train, Y_train) > fitness_function(best_ever_network, X_train, Y_train)):
        best_ever_network = best_network

    print(generation, ' : ', round(fitness_function(best_network, X_train, Y_train) * 100, 4), '%')

    new_population = []

    for _ in range(pop_size // 2):
        parent_1, parent_2 = select_parents(population, X_train, Y_train)

        child_1 = crossover(parent_1, parent_2)
        child_1 = mutate(child_1, mutation_rate)

        child_2 = crossover(parent_1, parent_2)
        child_2 = mutate(child_2, mutation_rate)

        new_population.extend([child_1, child_2])
    population = new_population 


if (fitness_function(best_network, X_train, Y_train) > fitness_function(best_ever_network, X_train, Y_train)):
        best_ever_network = best_network

print('Test accuracy:', round(fitness_function(best_ever_network, X_test, Y_test) * 100, 4), '%')

0  :  15.0561 %
1  :  13.2024 %
2  :  18.6683 %
3  :  21.0073 %
4  :  17.1488 %
5  :  16.222 %
6  :  14.478 %
7  :  14.439 %
8  :  13.9073 %
9  :  11.939 %
10  :  13.6854 %
11  :  12.3488 %
12  :  13.2049 %
13  :  13.2732 %
14  :  12.3488 %
15  :  13.5146 %
16  :  14.3488 %
17  :  12.9171 %
18  :  13.7854 %
19  :  14.5341 %
20  :  13.7195 %
21  :  12.6561 %
22  :  14.5902 %
23  :  11.8293 %
24  :  13.3073 %
25  :  12.8927 %
26  :  14.3878 %
27  :  13.2415 %
28  :  15.0171 %
29  :  14.3756 %
30  :  13.8561 %
31  :  14.8366 %
32  :  13.7488 %
33  :  14.1439 %
34  :  15.8415 %
35  :  15.9561 %
36  :  14.1707 %
37  :  13.9683 %
38  :  13.6902 %
