In [1]:
# ! pip install ucimlrepo



In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ucimlrepo import fetch_ucirepo

# Implementation 

In [None]:
import numpy as np
from initialization_builder import InitializationBuilder
from activation_builder import ActivationBuilder


class Layer:
    __slots__ = [
        "nodes_in",
        "nodes_out",
        "weights",
        "weights_gradient",
        "biases",
        "biases_gradient",
        "activation",
        "a",
        "z",
    ]

    def __init__(
        self,
        nodes_in,
        nodes_out,
        activation="sigmoid",
        weight_initialization="he",
        bias_initialization="zero",
    ):

        self.nodes_in = nodes_in
        self.nodes_out = nodes_out

        self.weights = InitializationBuilder.get_initialization(
            weight_initialization, nodes_out, nodes_in
        )

        self.weights_gradient = InitializationBuilder.get_initialization(
            "zero", nodes_out, nodes_in
        )

        self.biases = InitializationBuilder.get_initialization(
            bias_initialization, nodes_out, 1
        )
        self.biases_gradient = InitializationBuilder.get_initialization(
            "zero", nodes_out, 1
        )

        self.activation = ActivationBuilder.get_activation(activation)

    def forward(self, a):
        self.z = np.dot(self.weights, a) + self.biases
        self.a = self.activation.activation(self.z)
        return self.a

    def report_layer(self, layer_num):
        return (
            f"Layer number {layer_num}\nWeights\n{self.weights}\nbiases\n{self.biases}"
        )

In [None]:
import numpy as np
import pandas as pd
from layer_v2 import Layer
from optimizers_builder import OptimizersBuilder
from cost_function_builder import CostFunctionBuilder
from helper_functions import HelperFunction


class NeuralNetwork:
    __slots__ = [
        "layers",
        "optimizer",
        "cost_function",
        "layer_sizes",
        "regularization",
        "C",
    ]

    def __init__(
        self, optimizer="adam", cost_function="mse", regularization=None, C=0.01
    ):
        self.layers = []
        self.layer_sizes = []
        self.optimizer = OptimizersBuilder().build_optimizer(optimizer)
        self.cost_function = CostFunctionBuilder().build_cost_function(cost_function)
        assert regularization in [None, "l1", "l2"], "Regularization not supported"
        self.regularization = regularization
        self.C = C  # strength of regularization

    def add_layer(self, layer: Layer):
        if not self.layer_sizes:
            self.layer_sizes = [layer.nodes_in]
        else:
            assert (
                layer.nodes_in == self.layer_sizes[-1]
            ), f"Output in previous layer doesn't match input in this layer"
        self.layer_sizes.append(layer.nodes_out)
        self.layers.append(layer)

    def _forward(self, x: np.ndarray):
        if isinstance(x, pd.DataFrame) or isinstance(x, pd.Series):
            x = x.to_numpy()
            if len(x.shape) == 1:
                x = x.reshape(-1, 1)
        a = x.T
        for layer in self.layers:
            a = layer.forward(a)
        return a

    def predict(self, x: np.ndarray):
        return self._forward(x).T

    def predict_class(self, x: np.ndarray):
        return np.argmax(self.predict(x), axis=1, keepdims=True)

    def __str__(self) -> str:
        for i, layer in enumerate(self.layers, 1):
            print(layer.report_layer(i))
        return ""

    def flatten_weights_and_biases(self):
        weights_and_biases = []
        for layer in self.layers:
            weights_and_biases.append(layer.weights.flatten())
            weights_and_biases.append(layer.biases.flatten())
        return np.concatenate(weights_and_biases)

    def deflatten_weights_and_biases(self, solution):
        for layer in self.layers:
            layer.weights = solution[: layer.weights.size].reshape(layer.weights.shape)
            solution = solution[layer.weights.size :]
            layer.biases = solution[: layer.biases.size].reshape(layer.biases.shape)
            solution = solution[layer.biases.size :]

    def flatted_gradient(self):
        gradients = []
        for layer in self.layers:
            gradients.append(layer.weights_gradient.flatten())
            gradients.append(layer.biases_gradient.flatten())
        return np.concatenate(gradients)

    def calculate_gradient_numerically(self, x: np.ndarray, y: np.ndarray, h=1e-5):
        """
        THIS FUNCTION IS ONLY FOR EXPERIMENTAL PURPOSES
        It calculates the gradient numerically.
        It is to slow to be used in practice
        """
        initial_cost = self.cost_function.cost(self.predict(x), y)
        for layer in self.layers:
            for i in range(layer.weights.shape[0]):
                for j in range(layer.weights.shape[1]):
                    layer.weights[i, j] += h
                    new_cost = self.cost_function.cost(self.predict(x), y)
                    layer.weights[i, j] -= h

                    layer.weights_gradient[i, j] = (new_cost - initial_cost) / h

            for i in range(layer.biases.shape[0]):
                for j in range(layer.biases.shape[1]):
                    layer.biases[i, j] += h
                    new_cost = self.cost_function.cost(self.predict(x), y)
                    layer.biases[i, j] -= h
                    layer.biases_gradient[i, j] = (new_cost - initial_cost) / h

    def backpropagation(self, x: np.ndarray, y: np.ndarray):
        a = self._forward(x)
        y = y.T
        delta = self.cost_function.cost_derivative(a, y) * self.layers[
            -1
        ].activation.derivative(self.layers[-1].z)

        # Calculate gradients for the last layer
        self.layers[-1].biases_gradient = np.mean(delta, axis=1, keepdims=True)
        self.layers[-1].weights_gradient = (
            np.dot(delta, self.layers[-2].a.T) / x.shape[0]
        )

        # Add regularization to the last layer
        if self.regularization == "l1":
            self.layers[-1].weights_gradient += (
                self.C * np.sign(self.layers[-1].weights) / x.shape[0]
            )
        elif self.regularization == "l2":
            self.layers[-1].weights_gradient += (
                2 * self.C * self.layers[-1].weights / x.shape[0]
            )

        for previous_layer, layer, next_layer in zip(
            self.layers[-3::-1], self.layers[-2::-1], self.layers[::-1]
        ):
            delta = np.dot(next_layer.weights.T, delta) * layer.activation.derivative(
                layer.z
            )

            # Calculate gradients for the all but first hidden layer
            layer.biases_gradient = np.mean(delta, axis=1, keepdims=True)
            layer.weights_gradient = np.dot(delta, previous_layer.a.T) / x.shape[0]

            # Add regularization to the layer
            if self.regularization == "l1":
                layer.weights_gradient += self.C * np.sign(layer.weights) / x.shape[0]
            elif self.regularization == "l2":
                layer.weights_gradient += 2 * self.C * layer.weights / x.shape[0]

        delta = np.dot(self.layers[1].weights.T, delta) * self.layers[
            0
        ].activation.derivative(self.layers[0].z)

        # Calculate gradients for the first hidden layer
        self.layers[0].biases_gradient = np.mean(delta, axis=1, keepdims=True)
        self.layers[0].weights_gradient = np.dot(delta, x) / x.shape[0]

        # Add regularization to the first hidden layer
        if self.regularization == "l1":
            self.layers[0].weights_gradient += (
                self.C * np.sign(self.layers[0].weights) / x.shape[0]
            )
        elif self.regularization == "l2":
            self.layers[0].weights_gradient += (
                2 * self.C * self.layers[0].weights / x.shape[0]
            )

    def calculate_and_extract_gradient(
        self,
        x: np.ndarray,
        y: np.ndarray,
        current_solution: np.ndarray,
        use_backpropagation=True,
    ):
        self.deflatten_weights_and_biases(current_solution)
        if use_backpropagation:
            self.backpropagation(x, y)
        else:
            self.calculate_gradient_numerically(x, y)
        return self.flatted_gradient()

    def train(
        self,
        X,
        y,
        learning_rate=0.01,
        max_num_epoch=1000,
        batch_size=30,
        batch_fraction=None,
        using_backpropagation=True,
        silent=True,
    ):
        mse_after_epoch_train = self.optimizer(
            X=X,
            y=y,
            using_backpropagation=using_backpropagation,
            learning_rate=learning_rate,
            max_num_epoch=max_num_epoch,
            batch_size=batch_size,
            batch_fraction=batch_fraction,
            neural_network=self,
            silent=silent,
        )
        return mse_after_epoch_train

    def train_with_early_stopping(
        self,
        X,
        y,
        X_test,
        y_test,
        learning_rate=0.01,
        max_num_epoch=1000,
        batch_size=30,
        batch_fraction=None,
        using_backpropagation=True,
        silent=True,
    ):
        mse_after_epoch_train, mse_after_epoch_test = self.optimizer(
            X=X,
            y=y,
            X_test=X_test,
            y_test=y_test,
            using_backpropagation=using_backpropagation,
            learning_rate=learning_rate,
            max_num_epoch=max_num_epoch,
            batch_size=batch_size,
            batch_fraction=batch_fraction,
            neural_network=self,
            silent=silent,
        )
        return mse_after_epoch_train, mse_after_epoch_test

    def calculate_cost(self, x: np.ndarray, y: np.ndarray):
        base_cost = self.cost_function.cost(self.predict(x), y)
        if self.regularization is None:
            return base_cost
        if self.regularization == "l1":
            return base_cost + self.C * sum(
                np.sum(np.abs(layer.weights)) for layer in self.layers
            )
        if self.regularization == "l2":
            return base_cost + self.C * sum(
                np.sum(layer.weights**2) for layer in self.layers
            )

    def visualize_network(self):
        HelperFunction.visualize_network(self)

In [None]:
class Evolutionary_Algorithm:
    __slots__ = [
        "mutation_rate",
        "crossover_rate",
        "number_of_generations",
        "population_size",
        "function_to_optimize",
        "population",
        "problem_dim",
        "hall_of_fame",
    ]

    def __init__(
        self,
        mutation_rate=0.7,
        crossover_rate=0.7,
        number_of_generations=50,
        population_size=100,
    ):
        self.population_size = population_size
        self.number_of_generations = number_of_generations
        self.mutation_rate = mutation_rate
        self.crossover_rate = crossover_rate
        self.function_to_optimize = None
        self.population = None
        self.problem_dim = None
        self.hall_of_fame = []

    def _generate_population(self, n_genes):
        self.population = np.random.uniform(-10, 10, (self.population_size, n_genes))

    def _mutation(self, individual):
        for i in range(len(individual)):
            if np.random.rand() < self.mutation_rate:
                individual += np.random.normal(0, 1, self.problem_dim)
        return individual

    def _crossover(self, parent1, parent2):
        if np.random.rand() < self.crossover_rate:
            crossover_point = np.random.randint(1, self.problem_dim - 1)
            child1 = np.concatenate(
                (parent1[:crossover_point], parent2[crossover_point:])
            )
            child2 = np.concatenate(
                (parent2[:crossover_point], parent1[crossover_point:])
            )
            return child1, child2
        return parent1, parent2

    def _evaluate_population(self):
        fitness = np.zeros(len(self.population))
        for i, individual in enumerate(self.population):
            fitness[i] = self.function_to_optimize(*individual)
        self.hall_of_fame.append(np.argsort(fitness)[: int(self.population_size * 0.1)])
        return fitness

    def _tournament_selection(self, fitness):
        fitness = 1 / fitness
        probabilities = fitness / np.sum(fitness)
        return self.population[
            np.random.choice(
                range(len(self.population)),
                p=probabilities,
                size=self.population_size - len(self.hall_of_fame[-1]),
                replace=True,
            )
        ]

    def _select_individual(self, n=1):
        return self.population[
            np.random.choice(range(self.population_size), size=n, replace=False)
        ]

    def _visualize_individual(self, individual):
        print(individual)

    def optimize(self, function_to_optimize, silent=False):
        # initialize the population
        self.function_to_optimize = function_to_optimize
        self.problem_dim = function_to_optimize.__code__.co_argcount
        self._generate_population(self.problem_dim)
        best_evaluation_in_iteration = []
        best_solution_in_iteration = []

        # main algorithm loop
        for generation in range(self.number_of_generations):

            # crossover
            children = np.zeros(
                (2 * len(range(0, self.population_size, 2)), self.problem_dim)
            )
            for i in range(0, self.population_size, 2):
                parent1, parent2 = self._select_individual(n=2)
                children[i], children[i + 1] = self._crossover(parent1, parent2)
            self.population = np.vstack([self.population, children])

            # mutation
            # we mutate every individual in the population not random one
            mutated = np.zeros((self.population_size, self.problem_dim))
            for i in range(self.population_size):
                individual = self._select_individual()
                mutated[i] = self._mutation(individual)
            self.population = np.vstack([self.population, mutated])

            # evaluate the population and log the best solution
            fitness = self._evaluate_population()
            if not silent:
                print(
                    f"iter: {generation}, best: {np.min(fitness)} for {['%.3f' % n for n in self.population[self.hall_of_fame[-1][0]]]}"
                )

            best_evaluation_in_iteration.append(np.min(fitness))
            best_solution_in_iteration.append(self.population[self.hall_of_fame[-1][0]])

            # create new population
            self.population = np.vstack(
                [
                    self.population[self.hall_of_fame[-1]],
                    self._tournament_selection(fitness),
                ]
            )

        return best_evaluation_in_iteration, best_solution_in_iteration

# Testing methods 

### MPG

In [5]:
# fetch dataset
auto_mpg = fetch_ucirepo(id=9)

# data (as pandas dataframes)
X = auto_mpg.data.features
y = auto_mpg.data.targets

### Iris

In [None]:
# fetch dataset
iris = fetch_ucirepo(id=53)

# data (as pandas dataframes)
X = iris.data.features
y = iris.data.targets

### Multimodal large

In [None]:
multimodal_large_train = pd.read_csv(
    "https://raw.githubusercontent.com/nizwant/miowid/main/data/regression/multimodal-large-training.csv"
)
multimodal_large_test = pd.read_csv(
    "https://raw.githubusercontent.com/nizwant/miowid/main/data/regression/multimodal-large-test.csv",
)

In [None]:
mean = multimodal_large_train.mean()
std = multimodal_large_train.std()
multimodal_large_train = (multimodal_large_train - mean) / std
multimodal_large_test = (multimodal_large_test - mean) / std