In [92]:
import math
import numpy as np
import matplotlib.pyplot as plt

### This is sort of a Neuron. With its own value, children, operations, label and gradient

In [93]:
# Neuron
class Value:
    def __init__(self, data, _children=(), _op='', label=''):
        """
        Initialize a Value object.
            :param 
                data: The numerical value.
                _children: A tuple of child Value objects that this Value depends on.
                _op: The operation that produced this Value (e.g., '+', '*').
                label: A label for this Value, useful for debugging.
        """ 
        self.data = data
        self._prev = set(_children)
        self._op = _op
        self.label = label
        self.grad = 0.0
        self._backward = lambda: None
    
    def __repr__(self):
        """
        String representation of the Value object.
        Returns a string that includes the data and label of the Value.
        """
        return f"Value(data={self.data}, label='{self.label}', grad={self.grad}, op='{self._op}', backward={self._backward})"
    
    def __add__(self, other):
        """
        Add two Value objects.
        Returns a new Value object that represents the sum of the two.
        """
        if not isinstance(other, Value):
            other = Value(other)  # Convert to Value if not already
        out = Value(self.data + other.data, (self, other), '+')
        
        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad
        out._backward = _backward

        return out

    def __neg__(self):
        return self * -1  # Negation can be treated as multiplication by -1
    
    def __sub__(self, other):
        return self + (-other)  # Subtraction can be treated as addition of the negation

    def __pow__(self, other):
        """
        Raise a Value object to the power of another.
        Returns a new Value object that represents the result of the exponentiation.
        """
        if not isinstance(other, (int, float)):
            return "Must be an integer or float"  # Only support power with numbers, not other Value objects
        out = Value(self.data ** other, (self, other), '**')
        def backward():
            self.grad += other * (self.data ** (other - 1)) * out.grad

        out._backward = backward

        return out

    def __mul__(self, other):
        """
        Multiply two Value objects.
        Returns a new Value object that represents the product of the two.
        """
        if not isinstance(other, Value):
            other = Value(other)  # Convert to Value if not already
        out = Value(self.data * other.data, (self, other), '*')

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward

        return out
    
    def __rmul__(self, other):
        """
        Python doesnt know how to multiply (non_value * Value) and instead of throwing an error,
            Python will see by this method if Value knows how to handle it.
        """
        return self * other # We just reversed the order of multiplication and it will call __mul__
    
    def __truediv__(self, other):
        return self * other**-1  # Use the power method for division
    
    def tanh(self):
        """
        Apply the hyperbolic tangent function to the Value.
        Returns a new Value object that represents the tanh of the original Value.
        """
        x = self.data
        tanh = (math.exp(2*x) - 1) / (math.exp(2*x) + 1)
        out = Value(tanh, (self,), 'tanh')

        def _backward():
            self.grad += (1- tanh**2) * out.grad
        out._backward = _backward

        return out

    def exp(self):
        """
        Apply the exponential function to the Value.
        Returns a new Value object that represents the exponential of the original Value.
        """
        x = self.data
        out = Value(math.exp(x), (self,), 'exp')

        def _backward():
            self.grad += out.data * out.grad
        out._backward = _backward

        return out

    def backward(self):
        """
        Perform backpropagation to compute gradients.
        This method traverses the computational graph in reverse order,
        applying the chain rule to compute gradients for each Value.
        """
        topo = []
        visited = set()

        def build_topo(value):
            if value not in visited:
                visited.add(value)
                for child in value._prev:
                    build_topo(child)
                topo.append(value)

        build_topo(self)
        self.grad = 1.0
        for value in reversed(topo):
            value._backward()

In [94]:
class Neuron:
    def __init__(self, nin):
        """
        Initialize a Neuron object.
            :param nin: The number of inputs to the neuron.
        """
        self.w = [Value(np.random.randn(), label=f'w{i+1}') for i in range(nin)]
        self.b = Value(np.random.randn(), label='b')
    
    def __call__(self, x): # Call the neuron with input x, for ex. Neuron([1.0, 2.0, 3.0])
        """
        Call the neuron with input x.
            :param x: A list of input values.
        """
        activation = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)  # Weighted sum of inputs plus bias
        return activation.tanh() # Non linear activation function (tanh in this case)

class Layer:
    def __init__(self, nin, nout):
        """
        Initialize a Layer object.
            :param nin: The number of inputs to the layer. (How many neurons in the previous layer)
            :param nout: The number of outputs from the layer. (How many neurons in this layer)
        """
        self.neurons = [Neuron(nin) for _ in range(nout)]
    
    def __call__(self, x):
        """
        Call the layer with input x.
            :param x: A list of input values.
        """
        return [neuron(x) for neuron in self.neurons]  # Call each neuron in the layer with input x
    
class MLP:
    def __init__(self, nin, nouts):
        """
        Initialize a Multi-Layer Perceptron (MLP) object.
            :param nin: The number of inputs to the MLP.
            :param nouts: A list of integers representing the number of neurons in each layer.
        """
        sz = [nin] + nouts  # Create a list of sizes for each layer
        self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(sz) - 1)]  # Create layers based on the sizes
    
    def __call__(self, x):
        """
        Call the MLP with input x.
            :param x: A list of input values.
        """
        for layer in self.layers:
            x = layer(x)  # Pass input through each layer
        return x

In [95]:
def print_tree(value, indent=0):
    prefix = " " * indent
    print(f"{prefix}{value.label or '[op:'+value._op+']'}: {value.data:.2f} (grad: {value.grad:.2f})")
    for child in value._prev:
        print_tree(child, indent + 6)

In [96]:
x = [Value(3.0), Value(4.0), Value(5.0)]  # Example input
mlp = MLP(3, [4, 4, 1])  # Create an MLP with 3 inputs, 4 neurons in the first layer, and 5 in the second
output = mlp(x)  # Call the MLP with input x
output[0]

Value(data=-0.8210812192218304, label='', grad=0.0, op='tanh', backward=<function Value.tanh.<locals>._backward at 0x000002101AD2C4A0>)

In [97]:
output[0].backward()  # Perform backpropagation to compute gradients
print_tree(output[0])  # Print the computational graph of the output

[op:tanh]: -0.82 (grad: 1.00)
      [op:+]: -1.16 (grad: 0.33)
            [op:*]: 0.15 (grad: 0.33)
                  w4: 0.16 (grad: 0.32)
                  [op:tanh]: 0.98 (grad: 0.05)
                        [op:+]: 2.38 (grad: 0.00)
                              [op:+]: 0.61 (grad: 0.00)
                                    [op:+]: 1.45 (grad: 0.00)
                                          [op:*]: 0.95 (grad: 0.00)
                                                [op:tanh]: 1.00 (grad: 0.37)
                                                      [op:+]: 8.20 (grad: 0.00)
                                                            [op:*]: 2.22 (grad: 0.00)
                                                                  [op:]: 5.00 (grad: -0.01)
                                                                  w3: 0.44 (grad: 0.00)
                                                            [op:+]: 5.98 (grad: 0.00)
                                                                  [