In [1]:
class ActivationFunctions:
    @staticmethod
    def tanh(x):
        result = []
        for row in x.data:
            tanh_row = [math.tanh(_x) for _x in row]
            result.append(tanh_row)
        result = Tensor(result)
        result._prev = [x]
        return result


activation_functions = {
    "tanh": ActivationFunctions.tanh,
}


In [2]:
class ErrorFunction:
    @staticmethod
    def mse(output, target):
        if output.shape != target.shape or not isinstance(target.data[0], list):
            target.broadcast(output.shape[0])
        error_sum = 0.0
        for i in range(len(output.data)):
            for j in range(len(output.data[0])):
                error_sum += (output.data[i][j] - target.data[i][j]) ** 2
        
        return error_sum / (output.shape[0] * output.shape[1])
    
    @staticmethod
    def mse_prime(output, target):
        """Calculates the gradient of MSE with respect to the output."""
        return [(o - t) for o, t in zip(output, target)]

err_functions = {
    "mse": ErrorFunction.mse,
    "mse_prime": ErrorFunction.mse_prime
}

In [3]:
class Tensor:
    def __init__(self, data):
        self.set_data(data)
        self.grad = None
        self._backward = lambda: None
        self._prev = []

    def __repr__(self):
        return f"Tensor(data={self.data}, grad={self.grad}, prev={self._prev} shape={self.shape})"

    def set_data(self,data):
        self.data = data
        self.determine_shape()

    def determine_shape(self):
        data = self.data
        if isinstance(data[0], list):
            rows = len(data)
            cols = len(data[0]) if rows > 0 else 0
            self.shape = (rows, cols)
        else:
            self.shape = (len(data), 1)

    def dot(self, other):
        v1 = self.data  
        v2 = other.data 
        
        m = len(v1)      
        n = len(v2[0])  
        p = len(v2)    
    
        if len(v1[0]) != len(v2):
            raise ValueError("Incompatible dimensions for matrix multiplication.")
        
        result = [[0 for _ in range(n)] for _ in range(m)]
    
        for i in range(m):
            for j in range(n):
                for k in range(p):
                    result[i][j] += v1[i][k] * v2[k][j]        
        output = Tensor(result)
        
        def _backward():
            
            # Grad w.r.t. input (self)
            if self.grad is None:
                self.grad = [[0 for _ in range(len(self.data[0]))] for _ in range(len(self.data))]
            if other.grad is None:
                other.grad = [[0 for _ in range(len(other.data[0]))] for _ in range(len(other.data))]
            
            # Grad w.r.t. self (inputs)
            for i in range(m):
                for k in range(p):
                    for j in range(n):
                        self.grad[i][k] += output.grad[i][j] * other.data[k][j]
            
            # Grad w.r.t. other (weights)
            self_T = self.transpose()
            for k in range(p):
                for j in range(n):
                    for i in range(m):
                        other.grad[k][j] += output.grad[i][j] * self_T.data[k][i]
        
        output._backward = _backward
        output._prev = [self, other]
        
        return output
    
    def broadcast(self, m):
        result = []
        for _ in range(m):
            result.append(self.data)
        self.set_data(result)

    def add(self, other):
        if self.shape != other.shape or not isinstance(other.data[0], list):
            other.broadcast(self.shape[0])
        
        result = []
        for i in range(len(self.data)):
            row = []
            for j in range(len(self.data[0])):
                row.append(self.data[i][j] + other.data[i][j])
            result.append(row)
        output = Tensor(result)

        def _backward():
            # Gradient w.r.t. self (the first term in the addition)
            if self.grad is None:
                self.grad = [[0 for _ in range(len(self.data[0]))] for _ in range(len(self.data))]
            for i in range(len(self.data)):
                for j in range(len(self.data[0])):
                    self.grad[i][j] += output.grad[i][j]
    
            # Gradient w.r.t. other (the second term in the addition)
            if other.grad is None:
                other.grad = [[0 for _ in range(len(other.data[0]))] for _ in range(len(other.data))]
            for i in range(len(other.data)):
                for j in range(len(other.data[0])):
                    other.grad[i][j] += output.grad[i][j]
    
        output._backward = _backward
        output._prev = [self, other]
        
        return output
        

    def transpose(self):
        transposed_data = [[self.data[j][i] for j in range(self.shape[0])]
                           for i in range(self.shape[1])]
        return Tensor(transposed_data)

    def backward(self):
        if self.grad is None:
            self.grad = [[1.0 for _ in range(len(self.data[0]))] for _ in range(len(self.data))]
        self._backward()
        for prev_tensor in self._prev:
            prev_tensor.backward()



In [91]:
import random
import math

class DenseLayer:
    def __init__(self,nin,nout,act_func = "tanh"):
        self.weights = Tensor([[i + 1 for i in range(nout)] for _ in range(nin)])
        self.biases = Tensor([1 for _ in range(nout)])
        self.act_func = None if act_func == "" else activation_functions[act_func]
        
    def set_parameters(self, weights, biases):
        self.weights = Tensor(weights)
        self.biases = Tensor(biases)
        
    def set_biases(self, biases):
        self.biases = Tensor(biases)

    def get_parameters(self):
        return self.weights, self.biases

    def forward(self, inputs):
        return inputs.dot(self.weights).add(self.biases)
        
class MLP:
    def __init__(self, layer_sizes, err_func="mse"):
        self.layers = []
        self.err_func = err_functions[err_func]
        for i in range(len(layer_sizes) - 1):
            layer = DenseLayer(layer_sizes[i], layer_sizes[i + 1])
            self.layers.append(layer)
            
    def set_parameters(self,weights, biases):
        for layer,weight, bias in zip(self.layers,weights, biases):
            layer.set_parameters(weight, bias)
            

    def forward(self, inputs):
        for layer in self.layers:
            inputs = layer.forward(inputs)
        return inputs

    def get_parameters(self):
        all_weights = []
        all_biases = []
        for layer in self.layers:
            weights, biases = layer.get_parameters()
            all_weights.append(weights)
            all_biases.append(biases)
            
        return all_weights, all_biases
            
    
    def train(self,x,y, batch_size):
        num_samples = len(x)
        for i in range(0, num_samples, batch_size):
            x_batch = Tensor(x[i:i + batch_size])
            y_batch = Tensor(y[i:i + batch_size])
            
            output = self.forward(x_batch)
            layer = self.layers[0]
          
            output.backward()
            return output 


In [127]:
import torch
import torch.nn as nn
import random

class MLP_TORCH(nn.Module):
    def __init__(self, layer_sizes):
        super(MLP_TORCH, self).__init__()
        layers = []
        for i in range(len(layer_sizes) - 1):
            layers.append(nn.Linear(layer_sizes[i], layer_sizes[i + 1]))
        self.network = nn.ModuleList(layers)

    def forward(self, x):
        for layer in self.network:
            x = layer(x)
        return x

    def set_parameters(self, weights, biases):
        with torch.no_grad():
            for idx, layer in enumerate(self.network):
                layer.weight = nn.Parameter(torch.tensor(weights[idx], dtype=torch.float32).T)
                layer.bias = nn.Parameter(torch.tensor(biases[idx], dtype=torch.float32))
                
    def get_parameters(self):
        weights = []
        biases = []
        for idx, layer in enumerate(self.network):
            weights.append(layer.weight)
            biases.append(layer.bias)
        return weights, biases

def compare_weights_and_biases_values(mlp_torch, mlp):
    expected_weights, expected_biases = mlp_torch.get_parameters()
    weights, biases = mlp.get_parameters()
    weights = [torch.tensor(weight.data) for weight in weights]
    biases = [torch.tensor(bias.data) for bias in biases]
    
    for i, (expected_w, w) in enumerate(zip(expected_weights, weights)):
        comparison_result = torch.allclose(expected_w.T, w, rtol=1e-04, atol=1e-04)
        print(f"Comparison result for weights of layer {i+1}: {comparison_result}")
        if not comparison_result:
            print(f"Difference in weights for layer {i+1}:")
            print(f"Expected: {expected_w}")
            print(f"Provided: {w}")
            assert False
        print()
    
    for i, (expected_b, b) in enumerate(zip(expected_biases, biases)):
        comparison_result = torch.allclose(expected_b.flatten(), b.flatten(), rtol=1e-04, atol=1e-04)
        print(f"Comparison result for biases of layer {i+1}: {comparison_result}")
        if not comparison_result:
            print(f"Difference in biases for layer {i+1}:")
            print(f"Expected: {expected_b}")
            print(f"Provided: {b.flatten()}")
        print()


def compare_mlp(layer_sizes, x, y, batch_size=1):
    weights = []
    biases = []
    for l in range(len(layer_sizes) - 1):
        weights.append([[random.uniform(-1, 1) for _ in range(layer_sizes[l + 1])] for _ in range(layer_sizes[l])])
        biases.append([random.uniform(-1, 1) for _ in range(layer_sizes[l + 1])])
    mlp = MLP(layer_sizes)
    mlp.set_parameters(weights, biases)

    mlp_torch = MLP_TORCH(layer_sizes)
    mlp_torch.set_parameters(weights, biases)

    mlp_torch.get_parameters()
    mlp.get_parameters()
    compare_weights_and_biases_values(mlp_torch, mlp)

    for i in range(len(x)):
        x_batch = x[i:i + batch_size]
        y_batch = y[i:i + batch_size]
        print("mlp",mlp.forward(Tensor(x_batch)).data)
        print("***********")
        print("torch",mlp_torch.forward(torch.tensor(x_batch)).data)
    


    

compare_mlp([3,3,1],[
  [2.0, 3.0, -1.0],
  [3.0, -1.0, 0.5],
  [0.5, 1.0, 1.0],
  [1.0, 1.0, -1.0],
],[1.0, -1.0, -1.0, 1.0])
    
    

Comparison result for weights of layer 1: True

Comparison result for weights of layer 2: True

Comparison result for biases of layer 1: True

Comparison result for biases of layer 2: True

mlp [[0.03301823812879712]]
***********
torch tensor([[0.0330]])
mlp [[-0.6542797201119002]]
***********
torch tensor([[-0.6543]])
mlp [[0.8726628909616414]]
***********
torch tensor([[0.8727]])
mlp [[-0.10182294515073698]]
***********
torch tensor([[-0.1018]])


In [86]:
import torch
def compare_with_torch(inputs, weights, biases,act_func):
    layer = DenseLayer(len(inputs),len(biases),act_func)

    layer.set_weights(weights)
    layer.set_biases(biases)
    output = layer.forward(inputs)

    x = torch.tensor(inputs, dtype=torch.double, requires_grad=True)
    w = torch.tensor(weights, dtype=torch.double, requires_grad=True)
    b = torch.tensor(biases, dtype=torch.double, requires_grad=True)
    print(x)

    n = torch.matmul(w.T, x) + b

    expected_output = getattr(torch, act_func)(n)
    
    assert torch.allclose(expected_output, torch.tensor([output], dtype=torch.float64), atol=1e-6)

    expected_output.sum().backward()
    expected_w_grad = w.grad.detach()
    expected_b_grad = b.grad.detach()
    expected_x_grad = x.grad.detach()

    (w_grad,b_grad,x_grad) = layer.backward()
    assert torch.allclose(expected_w_grad, torch.tensor([w_grad], dtype=torch.float64), atol=1e-6)
    assert torch.allclose(expected_b_grad, torch.tensor([b_grad], dtype=torch.float64), atol=1e-6)
    assert torch.allclose(expected_x_grad, torch.tensor([x_grad], dtype=torch.float64), atol=1e-6)



In [252]:
test_cases = [
    ([2, 0], [[-3], [1]], [6.8813735870195432], "tanh"),
    ([3, 2], [[2, -1], [0.5, 3]], [1, -2], "relu"),
    ([3, 2, 5, 7], [[2, -1, 0.5], [0.5, 3, 1], [0.2, 0.3, 1], [0.5, 0.3, 0.2]], [0.5, 0.4, 0.1], "tanh"),
    ([3, 2, 5, 7], [[2, -1, 0.5], [0.5, 3, 1], [0.2, 0.3, 1], [0.5, 0.3, 0.2]], [0.5, 0.4, 0.1], "relu"),
    ([3, 2, 5, 7], [[2, -1, 0.5], [0.5, 3, 1], [0.2, 0.3, 1], [0.5, 0.3, 0.2]], [0.5, 0.4, 0.1], "sigmoid"),
    
]

for inputs, weights, biases, act_func in test_cases:
    compare_with_torch(inputs, weights, biases, act_func)
    print("Passed!")


AttributeError: 'list' object has no attribute 'dot'