In [161]:
class ActivationFunctions:
    @staticmethod
    def tanh(x):
        e = math.exp(2 * x)
        output = (e - 1) / (e + 1)
    
        return output

    @staticmethod
    def tanh_prime(x):
        return 1 - x ** 2

    @staticmethod
    def relu(x):
        return max(x,0)

    @staticmethod
    def relu_prime(x):
        return 1 if x > 0 else 0
        
    @staticmethod
    def sigmoid(x):
        output = 1 / (1 + math.exp(-x))
    
        return output

    @staticmethod
    def sigmoid_prime(x):
        return x * (1 - x)

    
    @staticmethod
    def softmax(x):
        """Applies the softmax function to each row (vector) in a matrix."""
        output = []
        for row in x:
            exp_values = [math.exp(value) for value in row]
            sum_exp = sum(exp_values)
            softmax_values = [value / sum_exp for value in exp_values]
            output.append(softmax_values)
        return output

    @staticmethod
    def softmax_prime(softmax_output):
        """Calculates the derivative of the softmax function."""
        # Create a Jacobian matrix
        n = len(softmax_output)
        jacobian = [[0] * n for _ in range(n)]

        for i in range(n):
            for j in range(n):
                if i == j:
                    jacobian[i][j] = softmax_output[i] * (1 - softmax_output[i])
                else:
                    jacobian[i][j] = -softmax_output[i] * softmax_output[j]

        return jacobian


activation_functions = {
    "tanh": ActivationFunctions.tanh,
    "tanh_prime": ActivationFunctions.tanh_prime,
    "relu": ActivationFunctions.relu,
    "relu_prime": ActivationFunctions.relu_prime,
    "sigmoid": ActivationFunctions.sigmoid,
    "sigmoid_prime": ActivationFunctions.sigmoid_prime,
    "softmax": ActivationFunctions.softmax,
    "softmax_prime": ActivationFunctions.softmax_prime,
}


In [167]:
class ErrorFunction:
    @staticmethod
    def mse(output, target):
        """Calculates the Mean Squared Error (MSE)."""
        return sum((o - t) ** 2 for o, t in zip(output, target)) / len(target)
    
    @staticmethod
    def mse_prime(output, target):
        """Calculates the gradient of MSE with respect to the output."""
        return [(o - t) for o, t in zip(output, target)]

err_functions = {
    "mse": ErrorFunction.mse,
    "mse_prime": ErrorFunction.mse_prime
}

In [169]:
import random
import math

class DenseLayer:
    def __init__(self,nin,nout, activation_func="tanh"):
        self.weights = [[random.uniform(-1, 1) for _ in range(nout)] for _ in range(nin)]
        self.biases = [random.uniform(-1, 1) for _ in range(nout)]
        self.activation_func = activation_functions[activation_func]
        self.activation_func_prime = activation_functions[activation_func + "_prime"]
        
    def set_weights(self,weights):
        self.weights = weights

    def set_biases(self,biases):
        self.biases = biases
    
    def forward(self, inputs):
        """Performs the forward pass"""
        self.input = inputs
        self.output = []
        isSoftMax = self.activation_func == ActivationFunctions.softmax

        for j in range(len(self.biases)):
            activation = self.biases[j]
            for i in range(len(inputs)):
                activation += inputs[i] * self.weights[i][j]
            if(not isSoftMax):
                self.output.append(self.activation_func(activation))
        if(isSoftMax):
            self.output = self.activation_func(self.output)

        return self.output
    
    def backward(self):
        d_input = [0] * len(self.input)
        d_weights = [[0 for _ in range(len(self.biases))] for _ in range(len(self.input))]
        d_biases = [0] * len(self.biases)
        
        for j in range(len(self.biases)):
            d_activation = self.activation_func_prime(self.output[j])
            
            d_biases[j] = d_activation
            
            for i in range(len(self.input)):
                d_weights[i][j] = self.input[i] * d_activation 
                d_input[i] += self.weights[i][j] * d_activation
        
        return (d_weights, d_biases, d_input)

class MLP:
    def __init__(self, layer_sizes, activation_funcs, err_func="mse"):
        self.layers = []
        self.err_func = err_functions[err_func]
        self.err_func_prime = err_functions[err_func+ "_prime"]
        for i in range(len(layer_sizes) - 1):
            layer = DenseLayer(layer_sizes[i], layer_sizes[i + 1], activation_funcs[i])
            self.layers.append(layer)

    def forward(self, inputs):
        """Perform the forward pass through the network."""
        for layer in self.layers:
            inputs = layer.forward(inputs)
        return inputs

    def backward(self, d_output):
        """Perform the backward pass through the network."""
        d_input = d_output
        for layer in reversed(self.layers):
            d_weights, d_biases, d_input = layer.backward()
            # Update weights and biases using the gradients (add your optimization step here)
            layer.weights = [[w - learning_rate * dw for w, dw in zip(weights, d_weights[i])] 
                             for i, weights in enumerate(layer.weights)]
            layer.biases = [b - learning_rate * db for b, db in zip(layer.biases, d_biases)]
        return d_input

    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            for i in range(len(X)):
                output = self.forward(X[i])
                d_output = self.err_func_prime(output, y[i])  # Implement your loss gradient here
                self.backward(d_output)
            print(f"Epoch {epoch + 1}/{epochs} completed.")


# Example usage:
layer_sizes = [2, 3, 1]  # Input layer with 2 neurons, hidden layer with 3 neurons, output layer with 1 neuron
activation_funcs = ["tanh", "sigmoid"]  # Activation functions for each layer

mlp = MLP(layer_sizes, activation_funcs)

# Sample data for training (X: input data, y: target output)
X = [[0.1, 0.2], [0.2, 0.3], [0.3, 0.4]]
y = [[0.5], [0.6], [0.7]]  # Example target outputs

learning_rate = 0.01
epochs = 10
mlp.train(X, y, epochs, learning_rate)




Epoch 1/10 completed.
Epoch 2/10 completed.
Epoch 3/10 completed.
Epoch 4/10 completed.
Epoch 5/10 completed.
Epoch 6/10 completed.
Epoch 7/10 completed.
Epoch 8/10 completed.
Epoch 9/10 completed.
Epoch 10/10 completed.


In [163]:
def compare_with_torch(inputs, weights, biases,act_func):
    layer = DenseLayer(len(inputs),len(biases),act_func)

    layer.set_weights(weights)
    layer.set_biases(biases)
    output = layer.forward(inputs)

    x = torch.tensor(inputs, dtype=torch.double, requires_grad=True)
    w = torch.tensor(weights, dtype=torch.double, requires_grad=True)
    b = torch.tensor(biases, dtype=torch.double, requires_grad=True)

    n = torch.matmul(w.T, x) + b

    expected_output = getattr(torch, act_func)(n)
    
    assert torch.allclose(expected_output, torch.tensor([output], dtype=torch.float64), atol=1e-6)

    expected_output.sum().backward()
    expected_w_grad = w.grad.detach()
    expected_b_grad = b.grad.detach()
    expected_x_grad = x.grad.detach()

    (w_grad,b_grad,x_grad) = layer.backward()
    assert torch.allclose(expected_w_grad, torch.tensor([w_grad], dtype=torch.float64), atol=1e-6)
    assert torch.allclose(expected_b_grad, torch.tensor([b_grad], dtype=torch.float64), atol=1e-6)
    assert torch.allclose(expected_x_grad, torch.tensor([x_grad], dtype=torch.float64), atol=1e-6)



In [165]:
test_cases = [
    ([2, 0], [[-3], [1]], [6.8813735870195432], "tanh"),
    ([3, 2], [[2, -1], [0.5, 3]], [1, -2], "relu"),
    ([3, 2, 5, 7], [[2, -1, 0.5], [0.5, 3, 1], [0.2, 0.3, 1], [0.5, 0.3, 0.2]], [0.5, 0.4, 0.1], "tanh"),
    ([3, 2, 5, 7], [[2, -1, 0.5], [0.5, 3, 1], [0.2, 0.3, 1], [0.5, 0.3, 0.2]], [0.5, 0.4, 0.1], "relu"),
    ([3, 2, 5, 7], [[2, -1, 0.5], [0.5, 3, 1], [0.2, 0.3, 1], [0.5, 0.3, 0.2]], [0.5, 0.4, 0.1], "sigmoid"),
    
]

for inputs, weights, biases, act_func in test_cases:
    compare_with_torch(inputs, weights, biases, act_func)
    print("Passed!")


Passed!
Passed!
Passed!
Passed!
Passed!
