# Dense neural network

In [19]:
import numpy as np
import pandas as pd

In [20]:
def activation_function(z, function):
    """
    Computes the activation function for the input.

    :param z: The input value or array.
    :type z: np.ndarray
    :param function: The type of activation function ('relu' or 'sigmoid').
    :type function: str
    :return: The result of the activation function applied to the input.
    :rtype: np.ndarray
    """
    if function == 'relu':
        return np.maximum(0, z)
    elif function == 'sigmoid':
        z = np.clip(z, -500, 500)  # To prevent overflow in the exponential function
        return 1 / (1 + np.exp(-z))
    else:
        raise ValueError(f"Unsupported activation function")

def activation_function_derivative(z, function):
    """
    Computes the derivative of the activation function for the input.

    :param z: The input value or array.
    :type z: np.ndarray
    :param function: The type of activation function ('relu' or 'sigmoid').
    :type function: str
    :return: The derivative of the activation function applied to the input.
    :rtype: np.ndarray
    """
    if function == 'relu':
        return np.where(z > 0, 1, 0)
    elif function == 'sigmoid':
        sig = 1 / (1 + np.exp(-np.clip(z, -500, 500)))  # To prevent overflow
        return sig * (1 - sig)
    else:
        raise ValueError(f"Unsupported activation function")

In [21]:
def normalize_data(X):
    """
    Normalizes the input data by subtracting the mean and dividing by the standard deviation.

    :param X: The input data.
    :type X: np.ndarray
    :return: The normalized data.
    :rtype: np.ndarray
    """
    mean = np.mean(X, axis=0)
    std = np.std(X, axis=0)
    X_normalized = (X - mean) / std
    
    return X_normalized

In [28]:
class Layer:
    """
    Represents a single layer in a neural network.
    """
    def __init__ (self, activation_function, num_neurons, num_neurons_in_previous_layer):
        """
        Initializes the layer with random weights and zero biases.

        :param activation_function: The activation function used in this layer.
        :type activation_function: str
        :param num_neurons: The number of neurons in this layer.
        :type num_neurons: int
        :param num_neurons_in_previous_layer: The number of neurons in the previous layer.
        :type num_neurons_in_previous_layer: int
        """
        if activation_function == 'relu':
            self.weights = np.random.randn(num_neurons, num_neurons_in_previous_layer) * np.sqrt(2 / num_neurons_in_previous_layer)
        else:
            self.weights = np.random.randn(num_neurons, num_neurons_in_previous_layer) * 0.01
        self.bias = np.zeros((num_neurons, 1))        
        self.activation_function = activation_function
        self.num_neurons = num_neurons
        self.num_neurons_in_previous_layer = num_neurons_in_previous_layer
        
    def forward(self, X):
        """
        Performs the forward pass by computing the linear combination of inputs and weights, adding the bias, and applying the activation function.

        :param X: The input data to the layer.
        :type X: np.ndarray
        :return: The output after applying the activation function.
        :rtype: np.ndarray
        """
        Z = self.weights @ X + self.bias
        A = activation_function(Z, self.activation_function)
        
        return A
    
    def update_weights(self, weights, bias):
        """
        Updates the layer's weights and bias.

        :param weights: The new weights.
        :type weights: np.ndarray
        :param bias: The new bias.
        :type bias: np.ndarray
        """
        self.weights = weights
        self.bias = bias

class NeuralNetwork:
    """
    Represents a neural network composed of multiple layers.
    """
    def __init__ (self, layers):
        """
        Initializes the neural network with the provided layers.

        :param layers: A list of layers that form the neural network.
        :type layers: list of Layer
        """
        self.layers = layers
        self.num_layers = len(layers)
        
    def optimize(self, X, y, learning_rate, loss_function):
        """
        Optimizes the neural network by performing a forward pass, computing gradients, and updating the weights.

        :param X: The input data.
        :type X: np.ndarray
        :param y: The true labels.
        :type y: np.ndarray
        :param learning_rate: The learning rate for gradient descent.
        :type learning_rate: float
        :param loss_function: The loss function to be minimized ('mse', 'cross_entropy', 'binary_cross_entropy').
        :type loss_function: str
        """
        A = X
        activations = [X]
        zs = []  
        for layer in self.layers:
            Z = layer.weights @ A + layer.bias
            A = activation_function(Z, layer.activation_function)
            activations.append(A)
            zs.append(Z)
        
        if loss_function == 'mse':
            delta = activations[-1] - y  
        elif loss_function == 'cross_entropy' or loss_function == 'binary_cross_entropy':
            delta = activations[-1] - y  
        else:
            raise ValueError("Unsupported loss function")

        m = y.shape[1]  
        for l in reversed(range(self.num_layers)):
            current_layer = self.layers[l]
            A_prev = activations[l]
            
            dW = (1/m) * delta @ A_prev.T
            db = (1/m) * np.sum(delta, axis=1, keepdims=True)
            current_layer.update_weights(current_layer.weights - learning_rate * dW,
                                        current_layer.bias - learning_rate * db)
            
            if l > 0:
                Z = zs[l-1]
                delta = (current_layer.weights.T @ delta) * activation_function_derivative(Z, self.layers[l-1].activation_function)

        
    def predict(self, X):
        """
        Predicts the output of the neural network for the given input data.

        :param X: The input data.
        :type X: np.ndarray
        :return: The predicted output.
        :rtype: np.ndarray
        """
        A = X
        for layer in self.layers:
            A = layer.forward(A)
    
        Y_hat = A
        return Y_hat
    
    def compute_loss(self, y_true, y_pred, loss_function):
        """
        Computes the loss between the true labels and the predicted labels.

        :param y_true: The true labels. 
        :type y_true: np.ndarray
        :param y_pred: The predicted labels.
        :type y_pred: np.ndarray
        :param loss_function: The loss function to be computed ('mse', 'cross_entropy', 'binary_cross_entropy').
        :type loss_function: str
        :return: The computed loss.
        :rtype: float
        """
        if loss_function == 'mse':
            loss = np.mean(np.power(y_pred - y_true, 2))
        elif loss_function == 'binary_cross_entropy':
            loss = -np.mean(y_true * np.log(y_pred + 1e-10) + (1 - y_true) * np.log(1 - y_pred + 1e-10)) # 1e-10 to prevent log(0) overflow
        elif loss_function == 'cross_entropy':
            loss = -np.mean(np.sum(y_true * np.log(y_pred + 1e-10), axis=1)) # 1e-10 to prevent log(0) overflow
        else:
            raise ValueError("Unsupported loss function")
        
        return loss


    def compute_accuracy(self, y_true, y_pred):
        """
        Computes the accuracy between the true labels and the predicted labels.

        :param y_true: The true labels.
        :type y_true: np.ndarray
        :param y_pred: The predicted labels.
        :type y_pred: np.ndarray
        :return: The computed accuracy.
        :rtype: float
        """
        y_pred_classes = np.argmax(y_pred, axis=0)
        y_true_classes = np.argmax(y_true, axis=0)
        accuracy = np.mean(y_pred_classes == y_true_classes)
        return accuracy
    
    def fit(self, X, y, epochs, batch_size, learning_rate, loss, normalize=False):
        """
        Trains the neural network using the provided training data.

        :param X: The input feature matrix.
        :type X: np.ndarray
        :param y: The true labels.
        :type y: np.ndarray
        :param epochs: The number of training epochs.
        :type epochs: int
        :param batch_size: The size of each mini-batch.
        :type batch_size: int
        :param learning_rate: The learning rate for gradient descent.
        :type learning_rate: float
        :param loss: The loss function to be minimized ('mse', 'cross_entropy', 'binary_cross_entropy').
        :type loss: str
        :param normalize: Whether to normalize the input data before training.
        :type normalize: bool
        """
        if normalize:
            X = normalize_data(X)
            
        for epoch in range(epochs):
            indexes = np.random.permutation(X.shape[1]) 
            X = X[:, indexes]
            y = y[:, indexes]
            
            for i in range(0, X.shape[1], batch_size):
                mini_batch_X = X[:, i:i + batch_size]
                mini_batch_y = y[:, i:i + batch_size]
                self.optimize(mini_batch_X, mini_batch_y, learning_rate, loss)
                
            y_pred = self.predict(X)
            accuracy = self.compute_accuracy(y, y_pred)
            loss_value = self.compute_loss(y, y_pred, loss)
            print(f'Epoch {epoch + 1}/{epochs} - accuracy: {accuracy:.4f} - loss: {loss_value:.4f}')



# Loading data

The dataset used for training is the MNIST, it was accesed using tensorflow datasets

In [23]:
from tensorflow.keras.datasets import mnist



# Load the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Normalize the data
X_train = X_train.astype(np.float32) / 255.0
X_test = X_test.astype(np.float32) / 255.0

# Flatten the 28x28 images into 784-dimensional vectors
X_train = X_train.reshape(X_train.shape[0], -1).T
X_test = X_test.reshape(X_test.shape[0], -1).T

# Convert labels to one-hot encoded vectors
y_train = np.eye(10)[y_train].T
y_test = np.eye(10)[y_test].T


# Creating the model and training

In [27]:
# Create the neural network
network = NeuralNetwork([
    Layer('relu', num_neurons=128, num_neurons_in_previous_layer=784),
    Layer('relu', num_neurons=64, num_neurons_in_previous_layer=128),
    Layer('sigmoid', num_neurons=10, num_neurons_in_previous_layer=64)
])

# Train the neural network
network.fit(X_train, y_train, epochs=40, batch_size=32, learning_rate=0.01, loss='cross_entropy', normalize=False)

# Evaluate the neural network
y_pred = network.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=0)
y_true_classes = np.argmax(y_test, axis=0)

accuracy = np.mean(y_pred_classes == y_true_classes)

Epoch 1/40 - accuracy: 0.8882 - loss: 0.0788
Epoch 2/40 - accuracy: 0.9185 - loss: 0.0564
Epoch 3/40 - accuracy: 0.9317 - loss: 0.0459
Epoch 4/40 - accuracy: 0.9425 - loss: 0.0397
Epoch 5/40 - accuracy: 0.9492 - loss: 0.0352
Epoch 6/40 - accuracy: 0.9548 - loss: 0.0314
Epoch 7/40 - accuracy: 0.9603 - loss: 0.0281
Epoch 8/40 - accuracy: 0.9641 - loss: 0.0259
Epoch 9/40 - accuracy: 0.9670 - loss: 0.0237
Epoch 10/40 - accuracy: 0.9704 - loss: 0.0221
Epoch 11/40 - accuracy: 0.9717 - loss: 0.0206
Epoch 12/40 - accuracy: 0.9739 - loss: 0.0194
Epoch 13/40 - accuracy: 0.9761 - loss: 0.0181
Epoch 14/40 - accuracy: 0.9777 - loss: 0.0173
Epoch 15/40 - accuracy: 0.9790 - loss: 0.0165
Epoch 16/40 - accuracy: 0.9808 - loss: 0.0155
Epoch 17/40 - accuracy: 0.9817 - loss: 0.0146
Epoch 18/40 - accuracy: 0.9830 - loss: 0.0138
Epoch 19/40 - accuracy: 0.9830 - loss: 0.0138
Epoch 20/40 - accuracy: 0.9846 - loss: 0.0127
Epoch 21/40 - accuracy: 0.9840 - loss: 0.0128
Epoch 22/40 - accuracy: 0.9864 - loss: 0.01

In [None]:
print(f'Accuracy for test set: {accuracy * 100:.2f}%')