# nn

> A description here

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
#|default_exp nn

In [None]:
#|export
import random
from neev.engine import Value

In [None]:
#|export
class Module:

    def zero_grad(self):
        for p in self.parameters():
            p.grad = 0

    def parameters(self):
        return []

class Neuron(Module):
    def __init__(self, 
                 nin, # number of inputs to the neuron
                 nonlin=True # do we have a non-linearity at the end
                ):
        self.w = [Value(random.uniform(-1,1)) for i in range(nin)]
        self.b = Value(random.uniform(-1,1))
        self.nonlin = nonlin
        
    def __call__(self,x):
        act =  sum((wi*xi for wi,xi in zip(self.w, x)), self.b)
        return act.relu() if self.nonlin else act
    
    def parameters(self):
        return self.w + [self.b]
    
    def __repr__(self):
        return f"{'ReLU' if self.nonlin else 'Linear'}Neuron({len(self.w)})"   
    
class Layer(Module):
    def __init__(self, 
                 nin,#number of inputs to each neuron in the layer 
                 nout,#number of neurons in the layer
                 **kwargs
                ):
        self.neurons = [Neuron(nin, **kwargs) for _ in range(nout)]

    def __call__(self, x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs
    
    def parameters(self):
        return [p for n in self.neurons for p in n.parameters()]

    def __repr__(self):
        return f"Layer of [{', '.join(str(n) for n in self.neurons)}]"    

    
class MLP(Module):
    def __init__(self, 
                 nin,#number of inputs to each neuron in the layer  
                 nouts # list with the number of neurons in each layer of the MLP
                ):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        return [p for l in self.layers for p in l.parameters()]
    
    def __repr__(self):
        return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"    

In [None]:
#|hide
x = [2.0,3.0]
n = Neuron(2)
n(x)

Value(data=0.0)

In [None]:
#|hide
# we want 3 neurons in our layer, each neuron will take two inputs
l = Layer(2,3)
l(x)

[Value(data=0.14131627735144314),
 Value(data=2.015731845254659),
 Value(data=2.815106788968965)]

In [None]:
### |hide
# we want 3 layers in our MLP
# with 4 neurons in the first, 4 in the second
# 1 neuron in the output layer
# each neuron in the MLP will take three inputs
n = MLP(3,[4,4,1])
n(x)

Value(data=0.0)

In [None]:
#|hide
assert len(n.parameters()) == 4*4 + 4*5 + 5

In [None]:
#|hide
from neev.viz import view_dot

In [None]:
#|hide
# view_dot(n(x))

In [None]:
#|hide
xs = [
    [2.0,3.0,-1.0],
    [3.0,-1.0,0.5],
    [0.5,1.0,1.0],
    [1.0,1.0,-1.0]
]
ys= [1.0,-1.0,-1.0,1.0] #targets

In [None]:
#|hide
for k in range(10):
    # forward pass
    ypred =[n(x) for x in xs]
#     print(ypred)
    loss = sum((yout-ygt)**2 for ygt,yout in zip(ys,ypred))
    
    # backward pass
    loss.backward()
#     print(n.layers[0].neurons[0].w[0].grad)
#     print(n.layers[0].neurons[0].w[0].data)

    # update
    for p in n.parameters():
        p.data += -0.05 *p.grad
        
    print(f'{k},{loss.data}')

ypred

0,4.0
1,4.0
2,4.0
3,4.0
4,4.0
5,4.0
6,4.0
7,4.0
8,4.0
9,4.0


[Value(data=0.0), Value(data=0.0), Value(data=0.0), Value(data=0.0)]

#|hide

The training loop above has a bug. Can you spot it?

Essentially we forgot to zero out the gradients!! The gradients continued to accumulate and essentially gave us a huge step size.

In [None]:
#|hide
n = MLP(3,[4,4,1])

In [None]:
#|hide
for k in range(10):
    # forward pass
    ypred =[n(x) for x in xs]
    loss = sum((yout-ygt)**2 for ygt,yout in zip(ys,ypred))
    
    # backward pass
    for p in n.parameters():
        p.grad = 0 #zero grad
    loss.backward()

    # update
    for p in n.parameters():
        p.data += -0.05 * p.grad
        p.grad = 0
        
    print(f'{k},{loss.data}')

ypred

0,3.7741061956415867
1,3.36266237838855
2,3.1721306456753666
3,2.9039711843015144
4,2.58831208728076
5,2.259529690772081
6,2.2625080262353623
7,2.395826249522481
8,2.7311816317761255
9,2.126828783173646


[Value(data=1.3541630250597492),
 Value(data=0.0),
 Value(data=0.0),
 Value(data=1.0373809423927969)]

In [None]:
#|hide
# view_dot(loss)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()