In [65]:
import numpy as np

### **Common Activation Functions and their Derivatives**

In [66]:
# Activation functions
def relu(Z):
    return np.maximum(0, Z)

def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

def tanh(Z):
    return np.tanh(Z)

def softmax(Z):
    expZ = np.exp(Z - np.max(Z))
    return expZ / expZ.sum(axis=0, keepdims=True)

# Derivatives
def relu_derivative(Z):
    return Z > 0

def sigmoid_derivative(Z):
    return sigmoid(Z) * (1 - sigmoid(Z))

def tanh_derivative(Z):
    return 1 - tanh(Z) ** 2

In [67]:
activation_functions = {
    'relu': relu,
    'sigmoid': sigmoid,
    'tanh': tanh,
    'softmax': softmax,
}

activation_derivatives = {
    'relu': relu_derivative,
    'sigmoid': sigmoid_derivative,
    'tanh': tanh_derivative,
}

### **Common Loss Functions and their Derivatives**

In [68]:
# Loss functions
def mse_loss(AL, Y):
    return np.mean((AL - Y) ** 2)

def binary_cross_entropy(AL, Y):
    return -np.mean(Y * np.log(AL) + (1 - Y) * np.log(1 - AL))

def categorical_cross_entropy(AL, Y):
    return -np.mean(np.sum(Y * np.log(AL), axis=0))

# Derivatives 
def mse_loss_derivative(AL, Y):
    return AL - Y

def binary_cross_entropy_derivative(AL, Y):
    return (AL - Y) / (AL * (1 - AL))

def categorical_cross_entropy_derivative(AL, Y):
    return AL - Y

In [69]:
loss_functions = {
    'mse': mse_loss,
    'binary_cross_entropy': binary_cross_entropy,
    'categorical_cross_entropy': categorical_cross_entropy
}

loss_derivatives = {
    'mse': mse_loss_derivative,
    'binary_cross_entropy': binary_cross_entropy_derivative,
    'categorical_cross_entropy': categorical_cross_entropy_derivative
}

### **Common Evaluation Metrics**

In [93]:
def accuracy(predictions, labels):
    return np.mean(predictions == labels)

def precision(predictions, labels):
    true_positives = np.sum((predictions == 1) & (labels == 1))
    predicted_positives = np.sum(predictions == 1)
    precision = true_positives / predicted_positives if predicted_positives > 0 else 0
    return precision

def recall(predictions, labels):
    true_positives = np.sum((predictions == 1) & (labels == 1))
    actual_positives = np.sum(labels == 1)
    recall = true_positives / actual_positives if actual_positives > 0 else 0
    return recall

In [94]:
eval_metric = {
    'accuracy' : accuracy,
    'precision' : precision,
    'recall' : recall
}

In [122]:
class SimpleMLPNN:
    def __init__(self, layer_sizes, activations, loss_function, learning_rate=0.01):
        self.layer_sizes = layer_sizes
        self.activations = activations
        self.learning_rate = learning_rate
        self.loss_function = loss_function
        self.params = self.initialize_parameters()

    def initialize_parameters(self):
        params = {}
        for i in range(1, len(self.layer_sizes)):
            params['W' + str(i)] = np.random.randn(self.layer_sizes[i], self.layer_sizes[i-1]) * 0.01
            params['b' + str(i)] = np.zeros((self.layer_sizes[i], 1))
        return params

    def forward(self, X):
        cache = {'A0': X}
        A = X
        L = len(self.layer_sizes) - 1
        for i in range(1, L + 1):
            Z = self.params['W' + str(i)].dot(A) + self.params['b' + str(i)]
            A = activation_functions[self.activations[i-1]](Z)
            cache['Z' + str(i)] = Z
            cache['A' + str(i)] = A
        return cache

    def backprop(self, cache, Y, batch_size):
        gradients = {}
        L = len(self.layer_sizes) - 1
        Y = Y.reshape(cache['A' + str(L)].shape)

        # Output layer gradient
        dZL = loss_derivatives[self.loss_function](cache['A' + str(L)], Y)
        gradients['dW' + str(L)] = 1./batch_size * dZL.dot(cache['A' + str(L-1)].T)
        gradients['db' + str(L)] = 1./batch_size * np.sum(dZL, axis=1, keepdims=True)

        # Hidden layer gradients
        dZ = dZL
        for i in reversed(range(1, L)):
            dA = self.params['W' + str(i+1)].T.dot(dZ)
            dZ = dA * activation_derivatives[self.activations[i-1]](cache['Z' + str(i)])
            gradients['dW' + str(i)] = 1./batch_size * dZ.dot(cache['A' + str(i-1)].T)
            gradients['db' + str(i)] = 1./batch_size * np.sum(dZ, axis=1, keepdims=True)

        return gradients

    # Basic SGD (Stochastic Gradient Descent)
    def update_parameters(self, gradients):
        for i in range(1, len(self.layer_sizes)):
            self.params['W' + str(i)] -= self.learning_rate * gradients['dW' + str(i)]
            self.params['b' + str(i)] -= self.learning_rate * gradients['db' + str(i)]
            
    def compute_cost(self, AL, Y):
        return loss_functions[self.loss_function](AL, Y)

    def make_predictions(self, X):
        cache = self.forward(X)
        predictions = cache['A' + str(len(self.layer_sizes) - 1)]
        return predictions # returns the output layer

    def train(self, X_train, Y_train, epochs, batch_size):
        m = X_train.shape[1]
        for epoch in range(epochs):
            permutation = np.random.permutation(m)
            X_train_shuffled = X_train[:, permutation]
            Y_train_shuffled = Y_train[:, permutation]

            for i in range(0, m, batch_size):
                X_batch = X_train_shuffled[:, i:i+batch_size]
                Y_batch = Y_train_shuffled[:, i:i+batch_size]

                cache = self.forward(X_batch)
                gradients = self.backprop(cache, Y_batch, batch_size)
                self.update_parameters(gradients)

            if epoch % 50 == 0:
                cache = self.forward(X_train)
                cost = self.compute_cost(cache['A' + str(len(self.layer_sizes) - 1)], Y_train)
                print(f"Epoch {epoch}, Cost: {cost}")
                
    def test(self, X_test, Y_test, metric='accuracy'):
        predictions = self.make_predictions(X_test)   
        if self.activations[-1] == 'softmax':
            predictions = np.argmax(predictions, axis=0)
            Y_test = np.argmax(Y_test, axis=0)
        elif self.activations[-1] == 'sigmoid':
            predictions = (predictions > 0.5).astype(int)

        score = eval_metric[metric](predictions, Y_test)

        print(f"Test {metric.capitalize()}: {score:.4f}")
        return score
        