# nn

> A description here

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
#|default_exp nn

In [None]:
#|export
import random
from neev.engine import Value

In [None]:
#|export
class Module:

    def zero_grad(self):
        for p in self.parameters():
            p.grad = 0

    def parameters(self):
        return []

class Neuron(Module):
    def __init__(self, 
                 nin, # number of inputs to the neuron
                 nonlin=True # do we have a non-linearity at the end
                ):
        self.w = [Value(random.uniform(-1,1)) for i in range(nin)]
        self.b = Value(random.uniform(-1,1))
        self.nonlin = nonlin
        
    def __call__(self,x):
        act =  sum((wi*xi for wi,xi in zip(self.w, x)), self.b)
        return act.relu() if self.nonlin else act
    
    def parameters(self):
        return self.w + [self.b]
    
    def __repr__(self):
        return f"{'ReLU' if self.nonlin else 'Linear'}Neuron({len(self.w)})"   
    
class Layer(Module):
    def __init__(self, 
                 nin,#number of inputs to each neuron in the layer 
                 nout,#number of neurons in the layer
                 **kwargs
                ):
        self.neurons = [Neuron(nin, **kwargs) for _ in range(nout)]

    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs
    
    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]

    def __repr__(self):
        return f"Layer of [{', '.join(str(n) for n in self.neurons)}]"    

    
class MLP(Module):
    def __init__(self, 
                 nin,#number of inputs to each neuron in the layer  
                 nouts # list with the number of neurons in each layer of the MLP
                ):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1], nonlin=i!=len(nouts)-1) for i in range(len(nouts))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        return [p for l in self.layers for p in l.parameters()]
    
    def __repr__(self):
        return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"    

In [None]:
#|hide
x = [2.0,3.0]
n = Neuron(2)
n(x)

Value(data=0.5878484569575098, grad=0)

In [None]:
#|hide
# we want 3 neurons in our layer, each neuron will take two inputs
l = Layer(2,3)
l(x)

[Value(data=0.9133910750788039, grad=0),
 Value(data=0, grad=0),
 Value(data=0, grad=0)]

In [None]:
### |hide
# we want 3 layers in our MLP
# with 4 neurons in the first, 4 in the second
# 1 neuron in the output layer
# each neuron in the MLP will take three inputs
n = MLP(3,[4,4,1])
n(x)

Value(data=0.3513214887908616, grad=0)

In [None]:
#|hide
assert len(n.parameters()) == 4*4 + 4*5 + 5

In [None]:
#|hide
xs = [
    [2.0,3.0,-1.0],
    [3.0,-1.0,0.5],
    [0.5,1.0,1.0],
    [1.0,1.0,-1.0]
]
ys= [1.0,-1.0,-1.0,1.0] #targets

In [None]:
#|hide
for k in range(10):
    # forward pass
    ypred =[n(x) for x in xs]
#     print(ypred)
    loss = sum((yout-ygt)**2 for ygt,yout in zip(ys,ypred))/len(ys)
    
    # backward pass
    loss.backward()
#     print(n.layers[0].neurons[0].w[0].grad)
#     print(n.layers[0].neurons[0].w[0].data)

    # update
    for p in n.parameters():
        p.data += -0.05 *p.grad
        
    print(f'{k},{loss.data}')

ypred

0,2.7333176008876476
1,1.8921703655494224
2,1.2660917509071825
3,0.9472041421564896
4,0.9206885620508152
5,0.975091868857794
6,0.8926058860157713
7,0.5597824418937001
8,0.47472393385540357
9,0.7111006727436218


[Value(data=0.07924014239999504, grad=-0.4603799288000025),
 Value(data=-1.648413092033964, grad=-0.32420654601698196),
 Value(data=-1.7810042524501783, grad=-0.3905021262250892),
 Value(data=0.017046900742157672, grad=-0.4914765496289212)]

#|hide

The training loop above has a bug. Can you spot it?

Essentially we forgot to zero out the gradients!! The gradients continued to accumulate and essentially gave us a huge step size.

In [None]:
#|hide
n = MLP(3,[4,4,1])

In [None]:
#|hide
for k in range(10):
    # forward pass
    ypred =[n(x) for x in xs]
    loss = sum((yout-ygt)**2 for ygt,yout in zip(ys,ypred))/len(ys)
    
    # backward pass
    n.zero_grad()
    loss.backward()

    # update
    for p in n.parameters():
        p.data += -0.05 * p.grad
        
    print(f'{k},{loss.data}')

ypred

0,2.8170895040170354
1,0.9969270672890835
2,0.7533475867144054
3,0.61082591721944
4,0.5156173863611934
5,0.44874168278198845
6,0.4006391992486378
7,0.3653970997569263
8,0.3429110035584173
9,0.32362653412514897


[Value(data=1.013470917055236, grad=0.006735458527618032),
 Value(data=-0.8237837837082684, grad=0.08810810814586578),
 Value(data=0.05145731432323194, grad=0.525728657161616),
 Value(data=0.6028727758434476, grad=-0.19856361207827622)]

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()