In [1]:
import math
import queue
import random

In [2]:
 class Value:
        def __init__(self, value, parents=[], op=None, pow=None):
            self.grad = 0
            self.value = value
            self.parents = parents
            self.op = op
            self.pow = pow
        
        def __repr__(self):
             return f"Value(data={self.value}, grad={self.grad})"
        
        def __add__(self,other):
            other = other if isinstance(other, Value) else Value(other)
            return Value(value=self.value+other.value, parents=[self, other], op='+')
                         
        def __mul__(self,other):
            other = other if isinstance(other, Value) else Value(other)
            return Value(value=self.value*other.value, parents=[self, other], op='*')

        def __pow__(self, other):
            return Value(self.value**other, [self], '**', pow=other)
        
        def __truediv__(self, other):
            return self * other**-1
        
        def __sub__(self, other):
            return self + (-other)

        def __rsub__(self, other): # other - self
            return other + (-self)
        
        def __rmul__(self, other): # other * self
            return self * other

        def __radd__(self, other): # other + self
            return self + other   
        
        def __neg__(self):
            return self * -1
        
        def tanh(self):
            return Value(value=math.tanh(self.value), parents=[self], op='tanh')
        
        def relu(self):
             return Value(0 if self.value < 0 else self.value, parents=[self], op='ReLU')
        
        def compute_parents_grad(self):
            match self.op:
                case '**':
                    self.parents[0].grad += self.pow * self.grad * self.parents[0].value **(self.pow - 1)
                case '*':
                    self.parents[0].grad += self.grad * self.parents[1].value
                    self.parents[1].grad += self.grad * self.parents[0].value
                case '+':
                    self.parents[0].grad += self.grad
                    self.parents[1].grad += self.grad                 
                case 'ReLu':
                    self.parents[0].grad += self.grad * (parents[0].value > 0)
                case 'tanh':
                    self.parents[0].grad +=  self.grad * (1 - math.tanh(self.parents[0].value)**2)
                    
                    
        def backpropagate(self):
            q = queue.Queue()
            
            def bfs(v):
                q.put(v)
                while not q.empty():
                    top = q.get()
                    top.compute_parents_grad()
                    for p in top.parents:
                        q.put(p)
                        
            self.grad = 1
            bfs(self)     

In [3]:
class Module:
     
    def zero_grad(self):
        for p in self.parameters():
            p.grad = 0
            
    def parameters(self):
        return []

In [4]:
class Neuron(Module):
    
    def __init__(self, nr_in, nonlin =True):
        self.weights = [Value(random.uniform(-1,1)) for _ in range(nr_in)]
        self.bias = Value(0)
        self.nonlin = nonlin
        
    def __call__(self, x):
        act = sum((wi*xi for wi,xi in zip(self.weights, x)), self.bias)
        return act.relu() if self.nonlin else act

    def parameters(self):
        return self.weights + [self.bias]
                         
    def __repr__(self):
        return f"{'ReLU' if self.nonlin else 'Linear'}Neuron({len(self.w)})"

In [5]:
class Layer(Module):

    def __init__(self, nr_in, nr_out, **kwargs):
        self.neurons = [Neuron(nr_in, **kwargs) for _ in range(nr_out)]

    def __call__(self, x):
        out = [n(x) for n in self.neurons]
        return out[0] if len(out) == 1 else out

    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]

    def __repr__(self):
        return f"Layer of [{', '.join(str(n) for n in self.neurons)}]"

In [6]:
class MLP(Module):

    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1], nonlin=i!=len(nouts)-1) for i in range(len(nouts))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

    def __repr__(self):
        return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"

In [7]:
n = MLP(3, [4,4,1])
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
]
ys = [1.0, -1.0, -1.0, 1.0]



In [8]:
for k in range(80):
        
    #forward pass
    ypred = [n(x) for x in xs]
    loss = sum(((yout - ygt )**2 for ygt, yout in zip(ys, ypred)))

    #backward pass
    for p in n.parameters():
        p.grad = 0.0
        
    loss.backpropagate()
    
    #update
    for p in n.parameters():
        #print(p.grad)
        p.value -= (0.05 * p.grad)
        
    print(k, loss.value)
    



0 4.474658113343049
1 4.333316313094321
2 4.21258901565381
3 4.102751975952989
4 4.0002602340083575
5 3.9037046055524143
6 3.8124204732798024
7 3.7260087909295527
8 3.6441708449552688
9 3.5666509506505575
10 3.493216294321775
11 3.4236495745261726
12 3.357746067670697
13 3.295312236543194
14 3.236164889666589
15 3.1801305492034277
16 3.127044908687212
17 3.076752338793565
18 3.02910542587483
19 2.9839645371470205
20 2.9411974096154223
21 2.900678760967685
22 2.862289921096722
23 2.825918483103329
24 2.7914579727293125
25 2.7588075352411643
26 2.7278716388407656
27 2.698559793730004
28 2.670786286002817
29 2.644469925582001
30 2.6195338074594865
31 2.595905085537939
32 2.573514758408603
33 2.5522974664354114
34 2.532191299548649
35 2.51313761518291
36 2.4950808658239887
37 2.4779684356575262
38 2.4617504858390626
39 2.446379807930459
40 2.4318116850716853
41 2.4180037604797264
42 2.404915912887867
43 2.392510138559091
44 2.3807504395265875
45 2.3696027177327417
46 2.3590346747552715
47 