In [1]:
from __future__ import annotations
import math
import random
from collections.abc import Iterable, Callable
from typing import List
from graphviz_plot import draw_dot

In [2]:
class Value:
    """
    what does single Value contains
    """

    def __init__(
        self, data, _children: Iterable | None = None, op="", label=""
    ) -> None:
        self.data = data
        self._prev = set(_children) if _children else set()
        self.grad: int | float = 0
        self._op = op
        self.label = label
        # its intresting to note backward will be called by output Value
        self._backward: Callable = lambda: None

    def __str__(self) -> str:
        return f"Label: {self.label} | Data:{self.data:.4f} | Grad:{self.grad:.4f}"
    
    def __repr__(self) -> str:
        return f"Value(Data:{self.data:.4f} | Grad:{self.grad:.4f})"
    
    def __neg__(self):
        return self * -1
    
    def __sub__(self, other):
        return self + (-other)
    
    def __rsub__(self, other):
        assert isinstance(other, (int, float))
        other = Value(other)
        return self + (-other)

    def __add__(self, other: Value | int | float) -> Value:

        if isinstance(other, int):
            other = Value(other)
        elif isinstance(other, float):
            other = Value(other)
        elif isinstance(other, Value):
            pass
        else:
            raise Exception("invalid type expected int or float")
        data = self.data + other.data
        out = Value(data, _children=(self, other), op="+")

        def _backward():
            # local derivative time global derivative
            # for addition local derivative will be 1
            # global derivate will be out.grad
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad

        out._backward = _backward

        return out

    def __radd__(self, val: int | float) -> Value:
        other = Value(val)
        data = self.data + other.data
        out = Value(data, _children=(self, other))

        def _backward():
            # local derivative time global derivative
            # for addition local derivative will be 1
            # global derivate will be out.grad
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad

        out._backward = _backward
        return out

    def __mul__(self, other: Value | int | float) -> Value:
        if isinstance(other, int):
            other = Value(other)
        elif isinstance(other, float):
            other = Value(other)
        elif isinstance(other, Value):
            pass
        else:
            raise Exception("invalid type expected int or float")
        data = self.data * other.data
        out = Value(data, _children=(self, other), op="*")

        def _backward():
            # local derivative time global derivative
            # for multiplication local derivative will be other.data
            # global derivate will be out.grad
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad

        out._backward = _backward

        return out

    def __rmul__(self, val: int | float) -> Value:
        other = Value(val)
        data = self.data * other.data
        out = Value(data, _children=(self, other), op="*")

        def _backward():
            # local derivative time global derivative
            # for multiplication local derivative will be other.data
            # global derivate will be out.grad
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad

        out._backward = _backward
        return out

    def tanh(self):
        n = self.data

        t = (math.exp(2 * n) - 1) / (math.exp(2 * n) + 1)
        out = Value(t, _children=(self,), op="tanh")

        def _backward():
            # local derivative time global derivative
            # for tanh local derivative will be 1-tanh2(x)
            # global derivate will be out.grad
            self.grad += (1 - t**2) * out.grad

        out._backward = _backward

        return out

    def __pow__(self, other):
        assert isinstance(other, (int, float))
        val = self.data**other
        out = Value(val, _children=(self,), op=f"**{other}")
        
        def _backward():
            self.grad += (other * (self.data ** (other-1))) * out.grad
        
        out._backward = _backward
        return out
    
    def __rpow__(self, other):
        assert isinstance(other, (int, float))
        val = self.data**other
        out = Value(val, _children=(self,), op=f"**{other}")
        
        def _backward():
            self.grad += (other * (self.data ** (other-1))) * out.grad
        
        out._backward = _backward
        return out

    def __truediv__(self, other):
        return self * other**-1

    def exp(self):
        x = self.data

        out = Value(math.exp(x), (self,), op="exp")

        def _backward():
            # local derivative times global derivate
            # local derivative will be  math.exp(x)
            self.grad += out.data * out.grad

        out._backward = _backward
        return out

    @property
    def prev(self):
        """
        Getter property for protected variable _prev
        """
        return self._prev

    @property
    def op(self):
        """
        Getter for self._op
        """
        return self._op

    def backprop(self):
        """
        Backpropagate
        """
        self.grad = 1.0
        topo = []
        visited = set()

        def build_topo(node):
            if node in visited:
                return

            visited.add(node)
            for child in node.prev:
                build_topo(child)
            topo.append(node)

        build_topo(self)
        for node in reversed(topo):
            node._backward()

In [3]:
2 + Value(2)

Value(Data:4.0000 | Grad:0.0000)

In [4]:
class Neuron:
    def __init__(self, nin) -> None:
        # nin represent number of inputs comming to neuron
        self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1, 1), label='bias')
    
    def __call__(self, x) -> Value:
        act  = sum(wi*xi for wi,xi in zip(self.w, x)) + self.b
        out = act.tanh()
        return out
    
    def parameters(self) -> List[Value]:
        return self.w + [self.b]

class Layer:
    def __init__(self, nin, nout) -> None:
        # nin will be number of neurons on left side or number of inputs to the single neuron
        # nout will be the number of neurons in this layer
        
        self.neurons = [Neuron(nin) for _ in range(nout)]
        
    def __call__(self, x) -> List[Value]|Value: 
        # for every neurons in the layer pass the inputs to the single neuron
        out = [n(x) for n in self.neurons]
        return out if len(out) > 1 else out[-1]
    
    def __len__(self) -> int:
        return len(self.neurons)
    
    def parameters(self):
        return [p for neuron in self.neurons for p in neuron.parameters()]

class MLP:
    def __init__(self, nin: int| float , nouts: List[int|float]) -> None:
        self.nin = nin
        sz = [nin] + nouts
        self.layers  = [Layer(n, o) for n, o in zip(sz[:], sz[1:])]
         
    def __call__(self, x) -> List[Value]|Value:
        for layer in self.layers:
            x = layer(x)
        return x
    
    def describe(self):
        for i, layer in enumerate(self.layers):
            print(f'layer:{i+1}, neurons:{len(layer)}')
    
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]



In [22]:
n = MLP(3, [4, 4, 1])

In [23]:
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
]
ys = [1.0, -1.0, -1.0, 1.0]


In [33]:
def train(epoch, learning_rate:int|float):
    # in training all we need to do is 
    # do a forward pass 
    # compute loss (dont forget to update parameters gradient to 0)
    # backpropagte loss 
    # update parameters 
    for i in range(epoch):
        # forward pass
        ypred = [n(x) for x in xs]
        
        # compute loss 
        loss = sum( (yo - yp)**2 for yp, yo in zip(ypred, ys))
        print(f'{i+1}: Loss:{loss.data:.4f}')
        # reset gradient
        for p in n.parameters():
            p.grad = 0.0
        
        # backpropagte loss
        loss.backprop()
        
        # update parameters
        for p in n.parameters():
            p.data += -learning_rate * p.grad
        

In [34]:
ypred = [n(x) for x in xs]
ypred

[Value(Data:0.8749 | Grad:0.0000),
 Value(Data:-0.8253 | Grad:0.0000),
 Value(Data:-0.8208 | Grad:0.0000),
 Value(Data:0.8269 | Grad:0.0000)]

In [49]:
train(20, 0.01)

1: Loss:0.0197
2: Loss:0.0196
3: Loss:0.0194
4: Loss:0.0193
5: Loss:0.0192
6: Loss:0.0191
7: Loss:0.0189
8: Loss:0.0188
9: Loss:0.0187
10: Loss:0.0186
11: Loss:0.0184
12: Loss:0.0183
13: Loss:0.0182
14: Loss:0.0181
15: Loss:0.0180
16: Loss:0.0179
17: Loss:0.0178
18: Loss:0.0176
19: Loss:0.0175
20: Loss:0.0174


In [50]:
ypred = [n(x) for x in xs]
ypred

[Value(Data:0.9441 | Grad:0.0000),
 Value(Data:-0.9301 | Grad:0.0000),
 Value(Data:-0.9371 | Grad:0.0000),
 Value(Data:0.9268 | Grad:0.0000)]