In [690]:
import math
import queue
import random

In [829]:
 class Value:
        def __init__(self, value, parents=[], op=None, pow=None):
            self.grad = 0
            self.value = value
            self.parents = parents
            self.op = op
            self.pow = pow
        
        def __repr__(self):
             return f"Value(data={self.value}, grad={self.grad})"
        
        def __add__(self,other):
            other = other if isinstance(other, Value) else Value(other)
            return Value(value=self.value+other.value, parents=[self, other], op='+')
                         
        def __mul__(self,other):
            other = other if isinstance(other, Value) else Value(other)
            return Value(value=self.value*other.value, parents=[self, other], op='*')

        def __pow__(self, other):
            return Value(self.value**other, [self], '**', pow=other)
        
        def __truediv__(self, other):
            return self * other**-1
        
        def __sub__(self, other):
            return self + (-other)

        def __rsub__(self, other): # other - self
            return other + (-self)
        
        def __rmul__(self, other): # other * self
            return self * other

        def __radd__(self, other): # other + self
            return self + other   
        
        def __neg__(self):
            return self * -1
        
        def tanh(self):
            return Value(value=math.tanh(self.value), parents=[self], op='tanh')
        
        def relu(self):
             return Value(0 if self.value < 0 else self.value, parents=[self], op='ReLU')
        
        def compute_parents_grad(self):
            match self.op:
                case '**':
                    self.parents[0].grad += self.pow * self.grad * self.parents[0].value **(self.pow - 1)
                case '*':
                    self.parents[0].grad += self.grad * self.parents[1].value
                    self.parents[1].grad += self.grad * self.parents[0].value
                case '+':
                    self.parents[0].grad += self.grad
                    self.parents[1].grad += self.grad                 
                case 'ReLu':
                    self.parents[0].grad += self.grad * (parents[0].value > 0)
                case 'tanh':
                    self.parents[0].grad +=  self.grad * (1 - math.tanh(self.parents[0].value)**2)
                    
                    
        def backpropagate(self):
            q = queue.Queue()
            
            def bfs(v):
                q.put(v)
                while not q.empty():
                    top = q.get()
                    top.compute_parents_grad()
                    for p in top.parents:
                        q.put(p)
                        
            self.grad = 1
            bfs(self)     

In [830]:
class Module:
     
    def zero_grad(self):
        for p in self.parameters():
            p.grad = 0
            
    def parameters(self):
        return []

In [831]:
class Neuron(Module):
    
    def __init__(self, nr_in, nonlin =True):
        self.weights = [Value(random.uniform(-1,1)) for _ in range(nr_in)]
        self.bias = Value(0)
        self.nonlin = nonlin
        
    def __call__(self, x):
        act = sum((wi*xi for wi,xi in zip(self.w, x)), self.b)
        return act.relu() if self.nonlin else act

    def parameters(self):
        return self.weights + [self.bias]
                         
    def __repr__(self):
        return f"{'ReLU' if self.nonlin else 'Linear'}Neuron({len(self.w)})"

In [832]:
class Layer(Module):

    def __init__(self, nr_in, nr_out, **kwargs):
        self.neurons = [Neuron(nr_in, **kwargs) for _ in range(nr_out)]

    def __call__(self, x):
        out = [n(x) for n in self.neurons]
        return out[0] if len(out) == 1 else out

    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]

    def __repr__(self):
        return f"Layer of [{', '.join(str(n) for n in self.neurons)}]"

In [833]:
class MLP(Module):

    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1], nonlin=i!=len(nouts)-1) for i in range(len(nouts))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

    def __repr__(self):
        return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"

In [834]:
class Module:

    def zero_grad(self):
        for p in self.parameters():
            p.grad = 0

    def parameters(self):
        return []

class Neuron(Module):

    def __init__(self, nin, nonlin=True):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
        self.b = Value(0)
        self.nonlin = nonlin

    def __call__(self, x):
        act = sum((wi*xi for wi,xi in zip(self.w, x)), self.b)
        return act.relu() if self.nonlin else act

    def parameters(self):
        return self.w + [self.b]

    def __repr__(self):
        return f"{'ReLU' if self.nonlin else 'Linear'}Neuron({len(self.w)})"

class Layer(Module):

    def __init__(self, nin, nout, **kwargs):
        self.neurons = [Neuron(nin, **kwargs) for _ in range(nout)]

    def __call__(self, x):
        out = [n(x) for n in self.neurons]
        return out[0] if len(out) == 1 else out

    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]

    def __repr__(self):
        return f"Layer of [{', '.join(str(n) for n in self.neurons)}]"

class MLP(Module):

    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1], nonlin=i!=len(nouts)-1) for i in range(len(nouts))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

    def __repr__(self):
        return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"

Value(data=-1.7882574558440452, grad=0)

In [835]:
n = MLP(3, [4,4,1])
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
]
ys = [1.0, -1.0, -1.0, 1.0]



In [837]:
for k in range(80):
        
    #forward pass
    ypred = [n(x) for x in xs]
    loss = sum(((yout - ygt )**2 for ygt, yout in zip(ys, ypred)))

    #backward pass
    for p in n.parameters():
        p.grad = 0.0
        
    loss.backpropagate()
    
    #update
    for p in n.parameters():
        #print(p.grad)
        p.value -= (0.05 * p.grad)
        
    print(k, loss.value)
    



0 8.716898246828779e+20
1 1.5539516537844654e+21
2 2.7702121487743304e+21
3 4.938426063981843e+21
4 8.803676642673592e+21
5 1.5694215408839125e+22
6 2.7977901426447888e+22
7 4.987589043713365e+22
8 8.891318934112022e+22
9 1.585045433679926e+23
10 2.8256426807396873e+23
11 5.037241450348267e+23
12 8.979833721390575e+23
13 1.6008248653287842e+24
14 2.8537724961994965e+24
15 5.087388156225356e+24
16 9.06922969037289e+24
17 1.616761384249637e+25
18 2.88218234937363e+25
19 5.1380340821885045e+25
20 9.159515613392016e+25
21 1.632856554276437e+26
22 2.9108750280913003e+26
23 5.1891841980695174e+26
24 9.250700350111284e+26
25 1.6491119548114102e+27
26 2.939853347935105e+27
27 5.240843523175851e+27
28 9.342792848393662e+27
29 1.665529180980031e+28
30 2.9691201525173e+28
31 5.293017126783114e+28
32 9.435802145179816e+28
33 1.682109843787557e+29
34 2.9986783137588523e+29
35 5.3457101286325364e+29
36 9.529737367374902e+29
37 1.6988555702771104e+30
38 3.028530732171252e+30
39 5.398927699433356e+30
