In [1]:
import numpy as np
import pandas as pd

# Load MNIST data from a CSV file and shuffle it
data = pd.read_csv('MNIST.csv')
data = np.array(data)
np.random.shuffle(data)

# Split the data into training and test sets
m, n = data.shape
data_test = data[0:1000].T  # Extract the first 1000 examples for testing
Y_test = data_test[0]        # Test labels
X_test = data_test[1:n]      # Test features
X_test = X_test / 255        # Normalise test features

data_train = data[1000:m].T  # Extract the remaining examples for training
Y_train = data_train[0]      # Train labels
X_train = data_train[1:n]    # Train features
X_train = X_train / 255      # Normalise train features

_, m_train = X_train.shape   # Number of training examples

# Define the ReLU activation function
def ReLU(Z):
    """
    Rectified Linear Unit (ReLU) activation function.

    Args:
    Z (numpy.ndarray): Input to the activation function.

    Returns:
    numpy.ndarray: Output of the activation function.
    """
    return np.maximum(Z, 0)

# Define the softmax activation function
def softmax(Z):
    """
    Softmax activation function.

    Args:
    Z (numpy.ndarray): Input to the activation function.

    Returns:
    numpy.ndarray: Output of the activation function.
    """
    A = np.exp(Z) / sum(np.exp(Z))
    return A

# Class definition for the neural network
class Neural_Network:
    def __init__(self, input_size, hidden_1_size, output_size, bias=1):
        """
        Initialize a neural network with given input, hidden, and output layer sizes.

        Args:
        input_size (int): Number of input neurons.
        hidden_1_size (int): Number of neurons in the first hidden layer.
        output_size (int): Number of output neurons.
        bias (int): Bias term.
        """
        self.input_size = input_size
        self.hidden_1_size = hidden_1_size
        self.output_size = output_size
        self.fitness = 0

        # Initialize weights and biases randomly
        self.W1 = np.random.rand(hidden_1_size, input_size) - 0.5
        self.b1 = np.random.rand(hidden_1_size, bias) - 0.5
        self.W2 = np.random.rand(output_size, hidden_1_size) - 0.5
        self.b2 = np.random.rand(output_size, bias) - 0.5

    # Method to create a copy of the neural network
    def copy(self):
        """
        Create a copy of the neural network.

        Returns:
        Neural_Network: A copy of the neural network.
        """
        new_nn = Neural_Network(0, 0, 0)  # Dummy sizes, will be overwritten
        new_nn.W1 = self.W1.copy()
        new_nn.b1 = self.b1.copy()
        new_nn.W2 = self.W2.copy()
        new_nn.b2 = self.b2.copy()
        return new_nn

    # Forward propagation method
    def forward_prop(self, X):
        """
        Perform forward propagation through the neural network.

        Args:
        X (numpy.ndarray): Input data.

        Returns:
        numpy.ndarray: Output of the neural network.
        """
        self.Z1 = self.W1.dot(X) + self.b1
        self.A1 = ReLU(self.Z1)
        self.Z2 = self.W2.dot(self.A1) + self.b2
        self.A2 = softmax(self.Z2)
        return self.A2
    
# Function to calculate fitness of the neural network
def fitness_function(network, X_train, Y_train):
    """
    Calculate the fitness of the neural network based on its performance on the training data.

    Args:
    network (Neural_Network): The neural network.
    X_train (numpy.ndarray): Input data for training.
    Y_train (numpy.ndarray): Target labels for training.

    Returns:
    float: The fitness of the neural network.
    """
    network.forward_prop(X_train)
    predictions = np.argmax(network.A2, axis=0)
    network.fitness = np.sum(predictions == Y_train) / Y_train.size
    return network.fitness

# Function to initialize the population of neural networks
def init_pop(pop_size):
    """
    Initialize a population of neural networks with random weights and biases.

    Args:
    pop_size (int): Size of the population.

    Returns:
    list: List of initialized neural networks.
    """
    population = []
    for _ in range(pop_size):
        network = Neural_Network(784, 10, 10)
        population.append(network)
    return population

# Function to select parents for crossover
def select_parents(population, X, y):
    """
    Select two parents from the population based on their fitness scores.

    Args:
    population (list): List of neural networks in the population.
    X (numpy.ndarray): Input data.
    y (numpy.ndarray): Target labels.

    Returns:
    tuple: Two selected parent networks.
    """
    fitness_scores = [network.fitness for network in population]
    total_fitness = sum(fitness_scores)
    probabilities = [score / total_fitness for score in fitness_scores]
    selected_indices = np.random.choice(len(population), size=2, p=probabilities, replace=False)
    parent_1 = population[selected_indices[0]].copy()
    parent_2 = population[selected_indices[1]].copy()
    return parent_1, parent_2

# Function for crossover operation
def crossover(parent_1, parent_2):
    """
    Perform crossover operation between two parent networks to generate a child network.

    Args:
    parent_1 (Neural_Network): First parent network.
    parent_2 (Neural_Network): Second parent network.

    Returns:
    Neural_Network: Child network obtained from crossover.
    """
    child = Neural_Network(0, 0, 0)  # Initialise with dummy values

    # Crossover W1
    mask = np.random.rand(*parent_1.W1.shape) < 0.5
    child.W1 = np.where(mask, parent_1.W1, parent_2.W1)

    # Crossover b1
    mask = np.random.rand(*parent_1.b1.shape) < 0.5
    child.b1 = np.where(mask, parent_1.b1, parent_2.b1)

    # Crossover W2
    mask = np.random.rand(*parent_1.W2.shape) < 0.5
    child.W2 = np.where(mask, parent_1.W2, parent_2.W2)

    # Crossover b2
    mask = np.random.rand(*parent_1.b2.shape) < 0.5
    child.b2 = np.where(mask, parent_1.b2, parent_2.b2)

    return child

# Function for mutation operation
def mutate(network, mutation_rate):
    """
    Perform mutation operation on the weights and biases of the network.

    Args:
    network (Neural_Network): The neural network.
    mutation_rate (float): Rate of mutation.

    Returns:
    Neural_Network: Mutated neural network.
    """
    # Mutate W1
    mutation_mask = np.random.rand(*network.W1.shape) < mutation_rate
    network.W1 += mutation_mask * np.random.randn(*network.W1.shape) * 0.1

    # Mutate b1
    mutation_mask = np.random.rand(*network.b1.shape) < mutation_rate
    network.b1 += mutation_mask * np.random.randn(*network.b1.shape) * 0.1

    # Mutate W2
    mutation_mask = np.random.rand(*network.W2.shape) < mutation_rate
    network.W2 += mutation_mask * np.random.randn(*network.W2.shape) * 0.1

    # Mutate b2
    mutation_mask = np.random.rand(*network.b2.shape) < mutation_rate
    network.b2 += mutation_mask * np.random.randn(*network.b2.shape) * 0.1

    return network



In [2]:
# Define parameters
pop_size = 8
mutation_rate = 0.05
generations = 150

# Initialize the population of neural networks
population = init_pop(pop_size)
# Select the best network from the initial population
best_ever_network = population[0]
best_ever_network.fitness = 0


In [3]:
for generation in range(generations):

    # Select the best network in the current population
    best_network = max(population, key=lambda x: fitness_function(x, X_train, Y_train))

    # Update the best_ever_network if the current best_network is better
    if (best_network.fitness > best_ever_network.fitness):
        best_ever_network = best_network

        # Print the current generation and the fitness of the best network
        print(generation, ' : ', round(best_network.fitness * 100, 4), '%')

    # Create a new population by selecting parents, performing crossover, and mutation
    new_population = []

    for _ in range(pop_size // 2):
        # Select two parents from the current population
        parent_1, parent_2 = select_parents(population, X_train, Y_train)

        # Perform crossover to create two children
        child_1 = crossover(parent_1, parent_2)
        child_1 = mutate(child_1, mutation_rate)

        child_2 = crossover(parent_1, parent_2)
        child_2 = mutate(child_2, mutation_rate)

        # Add the children to the new population
        new_population.extend([child_1, child_2])

    # Replace the old population with the new population
    population = new_population
    population.append(best_ever_network)

# Check if the best_network from the last generation is better than the best_ever_network
if (best_network.fitness > best_ever_network.fitness):
    best_ever_network = best_network

# Evaluate the best network on the test set and print its accuracy
print('Test accuracy:', round(fitness_function(best_ever_network, X_test, Y_test) * 100, 4), '%')

1  :  13.7683 %
2  :  15.5829 %
4  :  15.8049 %
5  :  16.1707 %
6  :  17.261 %
9  :  18.1463 %
12  :  18.4122 %
13  :  19.0634 %
15  :  22.2146 %
30  :  22.7512 %
32  :  23.6317 %
35  :  25.0049 %
36  :  27.9439 %
38  :  28.5878 %
52  :  28.7537 %
53  :  29.8293 %
66  :  30.2561 %
68  :  30.9195 %
69  :  31.1707 %
75  :  31.2098 %
79  :  31.9 %
84  :  32.3878 %
85  :  32.8122 %
97  :  32.9927 %
98  :  33.078 %
102  :  33.1268 %
103  :  33.1854 %
104  :  33.6463 %
107  :  34.2512 %
113  :  34.4 %
119  :  34.7659 %
124  :  34.9171 %
126  :  35.0561 %
130  :  35.6146 %
133  :  36.3756 %
Test accuracy: 38.2 %
