# nn

> A description here

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
#|default_exp nn

In [None]:
#|export
import random
from neev.engine import Value

In [None]:
#|export
class Module:

    def zero_grad(self):
        for p in self.parameters():
            p.grad = 0

    def parameters(self):
        return []

class Neuron(Module):
    def __init__(self, 
                 nin, # number of inputs to the neuron
                 nonlin=True # do we have a non-linearity at the end
                ):
        self.w = [Value(random.uniform(-1,1)) for i in range(nin)]
        self.b = Value(0.)
        self.nonlin = nonlin
        
    def __call__(self,x):
        act =  sum((wi*xi for wi,xi in zip(self.w, x)), self.b)
        return act.relu() if self.nonlin else act
    
    def parameters(self):
        return self.w + [self.b]
    
    def __repr__(self):
        return f"{'ReLU' if self.nonlin else 'Linear'}Neuron({len(self.w)})"   
    
class Layer(Module):
    def __init__(self, 
                 nin,#number of inputs to each neuron in the layer 
                 nout,#number of neurons in the layer
                 **kwargs
                ):
        self.neurons = [Neuron(nin, **kwargs) for _ in range(nout)]

    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs
    
    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]

    def __repr__(self):
        return f"Layer of [{', '.join(str(n) for n in self.neurons)}]"    

    
class MLP(Module):
    def __init__(self, 
                 nin,#number of inputs to each neuron in the layer  
                 nouts # list with the number of neurons in each layer of the MLP
                ):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        return [p for l in self.layers for p in l.parameters()]
    
    def __repr__(self):
        return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"    

In [None]:
#|hide
x = [2.0,3.0]
n = Neuron(2)
n(x)

Value(data=0)

In [None]:
#|hide
# we want 3 neurons in our layer, each neuron will take two inputs
l = Layer(2,3)
l(x)

[Value(data=0.10968307409657507),
 Value(data=1.2650493394385491),
 Value(data=0)]

In [None]:
### |hide
# we want 3 layers in our MLP
# with 4 neurons in the first, 4 in the second
# 1 neuron in the output layer
# each neuron in the MLP will take three inputs
n = MLP(3,[4,4,1])
n(x)

Value(data=0.9172487555710265)

In [None]:
#|hide
assert len(n.parameters()) == 4*4 + 4*5 + 5

In [None]:
#|hide
from neev.viz import view_dot

In [None]:
#|hide
# view_dot(n(x))

In [None]:
#|hide
xs = [
    [2.0,3.0,-1.0],
    [3.0,-1.0,0.5],
    [0.5,1.0,1.0],
    [1.0,1.0,-1.0]
]
ys= [1.0,-1.0,-1.0,1.0] #targets

In [None]:
#|hide
for k in range(10):
    # forward pass
    ypred =[n(x) for x in xs]
#     print(ypred)
    loss = sum((yout-ygt)**2 for ygt,yout in zip(ys,ypred))/len(ys)
    
    # backward pass
    loss.backward()
#     print(n.layers[0].neurons[0].w[0].grad)
#     print(n.layers[0].neurons[0].w[0].data)

    # update
    for p in n.parameters():
        p.data += -0.05 *p.grad
        
    print(f'{k},{loss.data}')

ypred

0,2.2507702656896567
1,1.0573550260509528
2,0.9285578490636487
3,1.0
4,1.0
5,1.0
6,1.0
7,1.0
8,1.0
9,1.0


[Value(data=0), Value(data=0), Value(data=0), Value(data=0)]

#|hide

The training loop above has a bug. Can you spot it?

Essentially we forgot to zero out the gradients!! The gradients continued to accumulate and essentially gave us a huge step size.

In [None]:
#|hide
n = MLP(3,[4,4,1])

In [None]:
#|hide
for k in range(10):
    # forward pass
    ypred =[n(x) for x in xs]
    loss = sum((yout-ygt)**2 for ygt,yout in zip(ys,ypred))/len(ys)
    
    # backward pass
    n.zero_grad()
    loss.backward()

    # update
    for p in n.parameters():
        p.data += -0.05 * p.grad
        
    print(f'{k},{loss.data}')

ypred

0,1.6644128972764667
1,0.7255956439240124
2,0.6455231759395288
3,0.5820697987413569
4,0.5697417379478957
5,0.5466548811467331
6,0.5453084951617699
7,0.5396681405739372
8,0.5361309242654972
9,0.5330331309996946
10,0.5301898481687445
11,0.5275682519116583
12,0.5265336352943062
13,0.5367907544633079
14,0.5270148361736797
15,0.5239376733113821
16,0.5217004750491708
17,0.5196930446614094
18,0.5178537781584924
19,0.5161674107820329
20,0.5146228677047072
21,0.5132099979015661
22,0.5119192855472541
23,0.5107417839323185
24,0.5096690725695857
25,0.5086932231376619
26,0.5078067719731929
27,0.5070026975213052
28,0.5062744013666103
29,0.5056156917099882
30,0.5050207683942108
31,0.5044842087944673
32,0.5040009540735206
33,0.5035662954559021
34,0.503175860302974
35,0.5028255978731135
36,0.5025117647311684
37,0.5022309098313266
38,0.5019798593403136
39,0.5017557012959952
40,0.5015557702125996
41,0.5013776317502308
42,0.5012190675653134
43,0.5010780604520058
44,0.5009527798741213
45,0.5008415679741094

[Value(data=1.0013313465105345),
 Value(data=0),
 Value(data=0),
 Value(data=0.9974133108335308)]

In [None]:
#|hide
# view_dot(loss)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()