In [169]:
class ActivationFunctions:
    @staticmethod
    def tanh(x):
        result = []
        for row in x.data:
            tanh_row = [math.tanh(_x) for _x in row]
            result.append(tanh_row)
        return Tensor(result)


activation_functions = {
    "tanh": ActivationFunctions.tanh,
}


In [170]:
class ErrorFunction:
    @staticmethod
    def mse(output, target):
        if output.shape != target.shape or not isinstance(target.data[0], list):
            target = target.broadcast(output.shape[0])
        print(output,target)
        error_sum = 0.0
        for i in range(len(output.data)):
            for j in range(len(output.data[0])):
                error_sum += (output.data[i][j] - target.data[i][j]) ** 2
        
        return error_sum / (output.shape[0] * output.shape[1])
    
    @staticmethod
    def mse_prime(output, target):
        """Calculates the gradient of MSE with respect to the output."""
        return [(o - t) for o, t in zip(output, target)]

err_functions = {
    "mse": ErrorFunction.mse,
    "mse_prime": ErrorFunction.mse_prime
}

In [174]:
class Tensor:
    def __init__(self, data):
        self.data = data
        if isinstance(data[0], list):
            rows = len(data)
            cols = len(data[0]) if rows > 0 else 0
            self.shape = (rows, cols)
        else:
            self.shape = (len(data), 1)

    def __repr__(self):
        return f"Tensor(data={self.data}, shape={self.shape})"

    def dot(self, other):
        v1 = self.data  
        v2 = other.data 
        
        m = len(v1)      
        n = len(v2[0])  
        p = len(v2)    
    
        if len(v1[0]) != len(v2):
            raise ValueError("Incompatible dimensions for matrix multiplication.")
        
        result = [[0 for _ in range(n)] for _ in range(m)]
    
        for i in range(m):
            for j in range(n):
                for k in range(p):
                    result[i][j] += v1[i][k] * v2[k][j]        
        output = Tensor(result)
        
        def _backward():
            if True or self.requires_grad:
                self.grad = [[0 for _ in range(len(self.data[0]))] for _ in range(len(self.data))]
                other_T = other.transpose()
                for i in range(m):
                    for k in range(p):
                        for j in range(n):
                            self.grad[i][k] += out.grad[i][j] * other_T.data[j][k]
            
            if True or other.requires_grad:
                other.grad = [[0 for _ in range(len(other.data[0]))] for _ in range(len(other.data))]
                self_T = self.transpose()
                for k in range(p):
                    for i in range(m):
                        for j in range(n):
                            other.grad[k][j] += output.grad[i][j] * self_T.data[k][i]
            
        output._backward = _backward
        output._prev = [self, other]
        
        return output
    
    def broadcast(self, m):
        result = []
        for _ in range(m):
            result.append(self.data)
        return Tensor(result)

    def add(self, other):
        if self.shape != other.shape or not isinstance(other.data[0], list):
            other = other.broadcast(self.shape[0])
        
        result = []
        for i in range(len(self.data)):
            row = []
            for j in range(len(self.data[0])):
                row.append(self.data[i][j] + other.data[i][j])
            result.append(row)
        output = Tensor(result)
        def _backward():
            if True or self.requires_grad:
                self.grad = [[self.grad[i][j] + out.grad[i][j] for j in range(len(self.data[0]))] for i in range(len(self.data))]
            if True or other.requires_grad:
                other.grad = [[other.grad[i][j] + out.grad[i][j] for j in range(len(other.data[0]))] for i in range(len(other.data))]

        output._backward = _backward
        output._prev = [self, other]
        return output
        

    def transpose(self):
        transposed_data = [[self.data[j][i] for j in range(self.shape[0])]
                           for i in range(self.shape[1])]
        return Tensor(transposed_data)
    
    def backward(self):
        if self.grad is None:
            self.grad = [[1.0 for _ in range(len(self.data[0]))] for _ in range(len(self.data))]
            self._backward()
        for prev_tensor in self._prev:
        prev_tensor.backward()


In [177]:
import random
import math

class DenseLayer:
    def __init__(self,nin,nout,act_func = "tanh"):
        self.weights = Tensor([[i + 1 for i in range(nout)] for _ in range(nin)])
        self.biases = Tensor([1 for _ in range(nout)])
        self.act_func = None if act_func == "" else activation_functions[act_func]

    def forward(self, inputs):
        return self.act_func(inputs.dot(self.weights).add(self.biases))
    
class MLP:
    def __init__(self, layer_sizes, err_func="mse"):
        self.layers = []
        self.err_func = err_functions[err_func]
        for i in range(len(layer_sizes) - 1):
            layer = DenseLayer(layer_sizes[i], layer_sizes[i + 1])
            self.layers.append(layer)

    def forward(self, inputs):
        for layer in self.layers:
            inputs = layer.forward(inputs)
        return inputs
    
    def train(self,x,y, batch_size):
        num_samples = len(x)
        for i in range(0, num_samples, batch_size):
            x_batch = Tensor(x[i:i + batch_size])
            y_batch = Tensor(y[i:i + batch_size])
            
            output = self.forward(x_batch)
            loss = self.err_func(output, y_batch)
            
            return loss
            
            d_output = self.loss_derivative(output, y_batch)
            self.backward(d_output, learning_rate)
        


layer_sizes = [3,2,1]  

mlp = MLP(layer_sizes)

xs = [
  [2.0, 3.0, -1.0],
  [3.0, -1.0, 0.5],
  [0.5, 1.0, 1.0],
  [1.0, 1.0, -1.0],
]
ys = [1.0, -1.0, -1.0, 1.0]
mlp.train(xs,ys,1)




Tensor(data=[[0.9950538575111004]], shape=(1, 1)) Tensor(data=[[1.0]], shape=(1, 1))


2.4464325520497674e-05

In [139]:
import torch
def compare_with_torch(inputs, weights, biases,act_func):
    layer = DenseLayer(len(inputs),len(biases),act_func)

    layer.set_weights(weights)
    layer.set_biases(biases)
    output = layer.forward(inputs)

    x = torch.tensor(inputs, dtype=torch.double, requires_grad=True)
    w = torch.tensor(weights, dtype=torch.double, requires_grad=True)
    b = torch.tensor(biases, dtype=torch.double, requires_grad=True)
    print(x)

    n = torch.matmul(w.T, x) + b

    expected_output = getattr(torch, act_func)(n)
    
    assert torch.allclose(expected_output, torch.tensor([output], dtype=torch.float64), atol=1e-6)

    expected_output.sum().backward()
    expected_w_grad = w.grad.detach()
    expected_b_grad = b.grad.detach()
    expected_x_grad = x.grad.detach()

    (w_grad,b_grad,x_grad) = layer.backward()
    assert torch.allclose(expected_w_grad, torch.tensor([w_grad], dtype=torch.float64), atol=1e-6)
    assert torch.allclose(expected_b_grad, torch.tensor([b_grad], dtype=torch.float64), atol=1e-6)
    assert torch.allclose(expected_x_grad, torch.tensor([x_grad], dtype=torch.float64), atol=1e-6)



In [28]:
test_cases = [
    ([2, 0], [[-3], [1]], [6.8813735870195432], "tanh"),
    ([3, 2], [[2, -1], [0.5, 3]], [1, -2], "relu"),
    ([3, 2, 5, 7], [[2, -1, 0.5], [0.5, 3, 1], [0.2, 0.3, 1], [0.5, 0.3, 0.2]], [0.5, 0.4, 0.1], "tanh"),
    ([3, 2, 5, 7], [[2, -1, 0.5], [0.5, 3, 1], [0.2, 0.3, 1], [0.5, 0.3, 0.2]], [0.5, 0.4, 0.1], "relu"),
    ([3, 2, 5, 7], [[2, -1, 0.5], [0.5, 3, 1], [0.2, 0.3, 1], [0.5, 0.3, 0.2]], [0.5, 0.4, 0.1], "sigmoid"),
    
]

for inputs, weights, biases, act_func in test_cases:
    compare_with_torch(inputs, weights, biases, act_func)
    print("Passed!")


tensor([2., 0.], dtype=torch.float64, requires_grad=True)
Passed!
tensor([3., 2.], dtype=torch.float64, requires_grad=True)
Passed!
tensor([3., 2., 5., 7.], dtype=torch.float64, requires_grad=True)
Passed!
tensor([3., 2., 5., 7.], dtype=torch.float64, requires_grad=True)
Passed!
tensor([3., 2., 5., 7.], dtype=torch.float64, requires_grad=True)
Passed!
