In [369]:
from nn import Neuron, Layer, MLP
from value import Value

In [370]:
from graphviz import Digraph

def trace(root):
    # builds set of all nodes and edges in a graph
    nodes, edges = set(), set()
    def build(v):
        if v not in nodes:
            nodes.add(v)
            for child in v._prev:
                edges.add((child, v))
                build(child)
    build(root)
    return nodes, edges

def draw_graph(root):
    # builds a digraph based on the does and edges produced by trace, adds 'ghost' nodes for operations
    graph = Digraph(format='svg', graph_attr={'rankdir': 'LR'})

    nodes, edges = trace(root)

    for n in nodes:
        uid = str(id(n))
        # for all values in the graph create a rectangular record node for it
        graph.node(name = uid, label = "{ %s | data %.2f | grad %.2f }" % (n.label, n.data, n.grad), shape='record')
        if n._op:
            # if this value is the result of an operation, add a node for it
            graph.node(name= uid + n._op, label = n._op)
            # and connect this node to it
            graph.edge(uid + n._op, uid)
        
    for n1, n2 in edges:
        # connect n1 to the op node of n2
        graph.edge(str(id(n1)), str(id(n2)) + n2._op)
        
    return graph

### Implementing a single neuron manually:
```tanh( (x1 * w1) + (x2 * w2) + b )```

```x1, x2```: Inputs

```w1, w2```: Weights

```b```: Bias

```tanh()```: Activation function $$tanh(x) = {e^{2x} - 1 \over e^{2x} + 1}$$



In [371]:
x1 = Value(2.0, label='x1')
x2 = Value(0.0, label='x2')

w1 = Value(-3.0, label='w1')
w2 = Value(1.0, label='w2')

b = Value(6.8813735870195432, label='b')

x1w1 = x1*w1; x1w1.label='x1w1'  # x1 * w1
x2w2 = x2*w2; x2w2.label='x2w2'  # x2 * w2

x1w1x2w2 = x1w1+x2w2; x1w1x2w2.label='x1w1x2w2' #  x1*w1 + x2*w2

n = x1w1x2w2 + b; n.label='n'
# -----
e = (2 * n).exp(); e.label='e'
o = (e - 1)/(e + 1); o.label='out'
# -----
o.backward()

### Single Neuron in Pytorch
The exact same implementation as above, but this time using Pytorch

Note: the ```.double()``` is used to match the default dtype from our own implementation

In [372]:
import torch

x1 = torch.Tensor([2.0]).double();                  x1.requires_grad = True
x2 = torch.Tensor([0.0]).double();                  x2.requires_grad = True
w1 = torch.Tensor([-3.0]).double();                 w1.requires_grad = True
w2 = torch.Tensor([1.0]).double();                  w2.requires_grad = True
b = torch.Tensor([6.8813735870195432]).double();    b.requires_grad = True
n = x1*w1 + x2*w2 + b
o = torch.tanh(n)

print(o.data.item())
o.backward()

print('---')
print('x2', x2.grad.item())
print('w2', w2.grad.item())
print('x1', x1.grad.item())
print('w1', w1.grad.item())

0.7071066904050358
---
x2 0.5000001283844369
w2 0.0
x1 -1.5000003851533106
w1 1.0000002567688737


### Building a Multi-Layer Perceptron from scratch

In [373]:
nin = 3 # size of input layers
nouts = [4,4,1] # size out all following layers
n = MLP(nin,nouts)

xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0],
]
ys = [1.0, -1.0, -1.0, 1.0] #deisred targets

### Optimizing the MLP 
This is done using gradient descent with the root mean squared error as the loss function

In [374]:
# --- params:---
error = .0001      # target error
max_iter = 10000    # max iterations
h = 0.1            # step size
# --------------

loss = 1.0
nn = 0
while loss > error and max_iter > nn:
    # nudge parameters until within the allowed error
    ypred = [n(x) for x in xs] # calculate outputs
    loss = sum([(yout-ytrue)**2 for yout, ytrue in zip(ys, ypred)])/len(ys) # calculate error

    for p in n.parameters():
        # reset gradients to 0 between iterations
        p.grad = 0.0
    
    loss.backward() # backpropagate gradients

    for p in n.parameters():
        # nudge parameters in opposite direction of gradient
        p.data += -h * p.grad
    
    nn += 1

print(f'loss:{loss.data} achieved in {nn} iteration')
print(f'predicted values:{[y.data for y in ypred]}')

loss:9.997190507132244e-05 achieved in 2964 iteration
predicted values:[0.9932045828630143, -0.9950121091188823, -0.9866973629839282, 0.9876764161383002]
