In [1]:
class ActivationFunctions:
    @staticmethod
    def tanh(x):
        result = []
        for row in x.data:
            tanh_row = [math.tanh(_x) for _x in row]
            result.append(tanh_row)
        output = Tensor(result)
        output._prev = [x]

        def _backward():
            if x.grad is None:
                x.grad = [[0 for _ in range(len(x.data[0]))] for _ in range(len(x.data))]
            for i in range(len(x.data)):
                for j in range(len(x.data[0])):
                    x.grad[i][j] += (1 - math.tanh(x.data[i][j])**2) * output.grad[i][j]

        output._backward = _backward
        return output
    
    @staticmethod
    def relu(x):
        result = []
        for row in x.data:
            relu_row = [max(0, _x) for _x in row]
            result.append(relu_row)
        output = Tensor(result)
        output._prev = [x]

        def _backward():
            if x.grad is None:
                x.grad = [[0 for _ in range(len(x.data[0]))] for _ in range(len(x.data))]
            for i in range(len(x.data)):
                for j in range(len(x.data[0])):
                    x.grad[i][j] += (1 if x.data[i][j] > 0 else 0) * output.grad[i][j]

        output._backward = _backward
        return output
    
    @staticmethod
    def sigmoid(x):
        result = []
        for row in x.data:
            sigmoid_row = [1 / (1 + math.exp(-_x)) for _x in row]
            result.append(sigmoid_row)
        output = Tensor(result)
        output._prev = [x]

        def _backward():
            if x.grad is None:
                x.grad = [[0 for _ in range(len(x.data[0]))] for _ in range(len(x.data))]
            for i in range(len(x.data)):
                for j in range(len(x.data[0])):
                    sig = 1 / (1 + math.exp(-x.data[i][j]))
                    x.grad[i][j] += sig * (1 - sig) * output.grad[i][j]

        output._backward = _backward
        return output
        
    @staticmethod
    def softmax(x):
        max_val = [[max(row)] for row in x.data]
        exps = [[math.exp(i - max_row[0]) for i in row] for row, max_row in zip(x.data, max_val)]
        
        sums = [[sum(row)] for row in exps]
        softmax_result = [[exps[i][j] / sums[i][0] for j in range(len(exps[i]))] for i in range(len(exps))]
        
        result = Tensor(softmax_result)
        
        result._prev = [x]
        
        def _backward():
            if x.grad is None:
                x.grad = [[0 for _ in range(len(x.data[0]))] for _ in range(len(x.data))]

            for i in range(len(result.data)):
                for j in range(len(result.data[0])):
                    for k in range(len(result.data[0])):
                        grad_val = result.data[i][j] * ((1 if j == k else 0) - result.data[i][k])
                        x.grad[i][k] += result.grad[i][j] * grad_val
        
        result._backward = _backward
        return result


activation_functions = {
    "tanh": ActivationFunctions.tanh,
    "relu": ActivationFunctions.relu,
    "sigmoid": ActivationFunctions.sigmoid,
    "softmax": ActivationFunctions.softmax,
    
}


In [2]:
class ErrorFunction:
    @staticmethod
    def mse(output, target):
        squared_diffs = [[(o - t) ** 2 for o, t in zip(out_row, target_row)] 
                          for out_row, target_row in zip(output.data, target.data)]
        
        loss_value = sum(sum(row) for row in squared_diffs) / (len(output.data) * len(output.data[0]))

        loss_tensor = Tensor([[loss_value]])
        loss_tensor._prev = [output] 

        def _backward():
            if output.grad is None:
                output.grad = [[0.0 for _ in range(len(output.data[0]))] for _ in range(len(output.data))]
            for i in range(len(output.data)):
                for j in range(len(output.data[0])):
                    output.grad[i][j] += (2 * (output.data[i][j] - target.data[i][j])) / (len(output.data) * len(output.data[0]))

        loss_tensor._backward = _backward  

        return loss_tensor

err_functions = {
    "mse": ErrorFunction.mse,
}


In [3]:
class Tensor:
    def __init__(self, data):
        self.set_data(data)
        self.grad = None
        self._backward = lambda: None
        self._prev = []

    def __repr__(self):
        return f"Tensor(data={self.data}, grad={self.grad}, prev={self._prev} shape={self.shape})"

    def set_data(self,data):
        self.data = data
        self.determine_shape()

    def determine_shape(self):
        data = self.data
        if isinstance(data[0], list):
            rows = len(data)
            cols = len(data[0]) if rows > 0 else 0
            self.shape = (rows, cols)
        else:
            self.shape = (len(data), 1)

    def dot(self, other):
        v1 = self.data  
        v2 = other.data 
        
        m = len(v1)      
        n = len(v2[0])  
        p = len(v2)    
    
        if len(v1[0]) != len(v2):
            raise ValueError("Incompatible dimensions for matrix multiplication.")
        
        result = [[0 for _ in range(n)] for _ in range(m)]
    
        for i in range(m):
            for j in range(n):
                for k in range(p):
                    result[i][j] += v1[i][k] * v2[k][j]        
        output = Tensor(result)
        
        def _backward():
            
            if self.grad is None:
                self.grad = [[0 for _ in range(len(self.data[0]))] for _ in range(len(self.data))]
            if other.grad is None:
                other.grad = [[0 for _ in range(len(other.data[0]))] for _ in range(len(other.data))]
            
            for i in range(m):
                for k in range(p):
                    for j in range(n):
                        self.grad[i][k] += output.grad[i][j] * other.data[k][j]
            
            self_T = self.transpose()
            for k in range(p):
                for j in range(n):
                    for i in range(m):
                        other.grad[k][j] += output.grad[i][j] * self_T.data[k][i]
        
        output._backward = _backward
        output._prev = [self, other]
        
        return output

    def broadcasted(self, m):
        result = []
        for _ in range(m):
            result.append(self.data)
        return Tensor(result)

    def add(self, other):
        compatible_other = other
        if self.shape != other.shape or not isinstance(other.data[0], list):
            compatible_other = other.broadcasted(self.shape[0])
    
        result = []
        for i in range(len(self.data)):
            row = []
            for j in range(len(self.data[0])):
                row.append(self.data[i][j] + compatible_other.data[i][j])
            result.append(row)
        output = Tensor(result)
    
        def _backward():
            if self.grad is None:
                self.grad = [[0 for _ in range(len(self.data[0]))] for _ in range(len(self.data))]
            for i in range(len(self.data)):
                for j in range(len(self.data[0])):
                    self.grad[i][j] += output.grad[i][j]
            
            if other.grad is None:
                other.grad = [0 for _ in range(len(other.data))]
    
            for i in range(len(output.data)):
                for j in range(len(output.data[0])):
                    other.grad[j] += output.grad[i][j]
    
        output._backward = _backward
        output._prev = [self, other]
        
        return output


    def step(self, lr):
        if(isinstance(self.data[0], list)):
            for i in range(len(self.data)):
                for j in range(len(self.data[0])):
                    self.data[i][j] += self.grad[i][j] * -lr
        else:
            for i in range(len(self.data)):
                self.data[i] += self.grad[i] * -lr

    def transpose(self):
        transposed_data = [[self.data[j][i] for j in range(self.shape[0])]
                           for i in range(self.shape[1])]
        return Tensor(transposed_data)

    def backward(self):
        if self.grad is None:
            self.grad = [[1.0 for _ in range(len(self.data[0]))] for _ in range(len(self.data))]
        self._backward()
        for prev_tensor in self._prev:
            prev_tensor.backward()



In [4]:
import random
import math

class DenseLayer:
    def __init__(self,nin,nout,act_func = "tanh"):
        self.weights = Tensor([[i + 1 for i in range(nout)] for _ in range(nin)])
        self.biases = Tensor([1 for _ in range(nout)])
        
    def set_parameters(self, weights, biases):
        self.weights = Tensor(weights)
        self.biases = Tensor(biases)
        
    def set_biases(self, biases):
        self.biases = Tensor(biases)

    def get_parameters(self):
        return self.weights, self.biases

    def forward(self, inputs):
        result = inputs.dot(self.weights).add(self.biases)
        return result
        
class MLP:
    def __init__(self, layer_sizes, activations = []):
        self.layers = []
        self.activations = []
        for i in range(len(layer_sizes) - 1):
            self.layers.append(DenseLayer(layer_sizes[i], layer_sizes[i + 1]))
            if(i < len(activations)):
                self.activations.append(None if activations[i] == None else activations[i])
            else:
                self.activations.append(None)
        
            
    def set_parameters(self,weights, biases):
        for layer,weight, bias in zip(self.layers,weights, biases):
            layer.set_parameters(weight, bias)
            

    def forward(self, inputs):
        for idx, layer in enumerate(self.layers):
            inputs = layer.forward(inputs)
            if self.activations[idx]:
                inputs = activation_functions[self.activations[idx]](inputs)
            
        return inputs

    def get_parameters(self):
        all_weights = []
        all_biases = []
        for layer in self.layers:
            weights, biases = layer.get_parameters()
            all_weights.append(weights)
            all_biases.append(biases)
            
        return all_weights, all_biases

    def step(self, lr):
        weights, biases = self.get_parameters()
        for param in weights + biases:
            param.step(lr)

    def zero_grad(self):
        weights, biases = self.get_parameters()
        for param in weights + biases:
            param.grad = None
            
