In [13]:
# pip install graphviz
from graphviz import Digraph
import math
import random


In [14]:
class Value:
    def __init__(self, data, _children=(), _op="", label=''):
        self.data = data
        self.grad = 0

        self._prev = set(_children)
        self._backward = lambda: None
        self._op = _op
        self.label = label
    
    def __radd__(self, other):
        return self + other

    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(other.data + self.data, (self, other), '+')

        def _backward():
            self.grad += out.grad
            other.grad += out.grad
        
        out._backward = _backward
        return out
    
    def __sub__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data - other.data, (self, other), '-')

        def _backward():
            self.grad += out.grad
            other.grad -= out.grad
        
        out._backward = _backward
        return out
    
    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*')

        def _backward():
            self.grad += out.grad * other.data
            other.grad += out.grad * self.data
        
        out._backward = _backward
        return out
    
    def __rmul__(self, other):
        return other * self
    
    def __neg__(self):
        return self * -1

    def __pow__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data ** other.data, (self, other), '**')

        def _backward():
            self.grad += out.grad * (other.data * self.data ** (other.data - 1))        
        out._backward = _backward
        return out
    
    def tanh(self):
        v = math.tanh(self.data)
        out = Value(v, (self, ), 'tanh', label=f'tanh({self.label})')
        def _backward():
            self.grad += out.grad * (1 - v**2)
        
        out._backward = _backward
        return out

    def relu(self):
        out = Value(self.data if self.data > 0 else 0, (self, ), 'relu')
        def _backward():
            if self.data > 0:
                self.grad += out.grad
        return out
        

    def exp(self):
        out = Value(math.exp(self.data), (self,), 'exp', label=f'exp({self.label})')
        def _backward():
            self.grad += out.grad * out.data
        out._backward = _backward
        return out
    
    def __repr__(self):
        n = self
        if n.label:
            return "{%s|v:%.4f|g:%.4ff}" % (n.label, n.data, n.grad)
        return "{v:%.4f|g:%.4f}" % (n.data, n.grad)
    
    def backward(self):
        visited = set()

        queue = [self]
        while queue:
            e = queue.pop(0)
            if e not in visited:  pass

        def build(n):
            if n in visited: return
            visited.add(n)
            for p in self._prev:
                build(p)

    
    def backward(self):
        topo = []
        visited = set()
        # only compute after all dependency get computed
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)
        self.grad = 1.0
        for idx, n in enumerate(reversed(topo)):
            n.label = str(idx+1)

        for idx, n in enumerate(reversed(topo)):
            strs = []
            for c in n._prev:
                strs.append(f'{c.label}: {c.grad}')
            # print(f'{n.label}, beofore, {', '.join(strs)}')
            n._backward()        

    def visit(self):
        topo, visited, queue = [], set(), [self]
        visited.add(self)

        while queue:
            t = queue.pop(0)
            topo.append(t)
            for n in t._prev:
                if n not in visited:
                    visited.add(n)
                    queue.append(n)
        self.grad = 1.0

        for idx, n in enumerate(topo):
            n.label = str(idx+1)

        for idx, n in enumerate(topo):
            strs = []
            for c in n._prev:
                strs.append(f'{c.label}: {c.grad}')
            print(f'{n.label}, beofore, {', '.join(strs)}')
            n._backward()
        



In [None]:
class Nueron:
    def __init__(self, nin):
        self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)]
        self.b = Value(0)

    def __call__(self, x):
        act = sum((wi * xi for wi, xi in zip(self.w, x)), self.b)
        return act.tanh()
    
    def parameters(self):
        return self.w + [self.b]
    
class Layer:
    def __init__(self, nin, nout):
        self.nuerons = [Nueron(nin) for _ in range(nout)]
    
    def __call__(self, x):
        outs = [n(x) for n in self.nuerons]
        return outs[0] if len(outs) == 1 else outs

    def parameters(self):
        return [p for n in self.nuerons for p in n.parameters()]


class MLP:
  def __init__(self, nin, nouts):
    sz = [nin] + nouts
    self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]
  
  def __call__(self, x):
    for layer in self.layers:
      x = layer(x)
    return x
  
  def parameters(self):    
    return [p for layer in self.layers for p in layer.parameters()]    

n = Nueron(10)
x = [Value(i) for i in range(10)]
l = Layer(10, 20)


In [None]:
x = [2.0, 3.0, -1.0]
n = MLP(3, [4, 4, 1])
n(x)

xs = [
  [2.0, 3.0, -1.0],
  [3.0, -1.0, 0.5],
  [0.5, 1.0, 1.0],
  [1.0, 1.0, -1.0],
]
ys = [1.0, -1.0, -1.0, 1.0] # desired targets


In [None]:
ypred = [n(x) for x in xs]
ypred[0] - 1

In [None]:
for k in range(20):
  
  # forward pass
  ypred = [n(x) for x in xs]
  loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))
  
  # backward pass
  for p in n.parameters():
    p.grad = 0.0
  loss.backward()
  
  # update
  for p in n.parameters():
    p.data += -0.1 * p.grad
  
  print(k, loss.data)

In [None]:
draw_dot(loss)

In [None]:
def trace(root):
    edges, nodes = set(), set()

    def build(n):
        nodes.add(n)
        for p in n._prev:
            edges.add((n, p))
            build(p)
    build(root)
    return nodes, edges

def draw_dot(root):
  dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) # LR = left to right
  
  nodes, edges = trace(root)
  for n in nodes:
    uid = str(id(n))
    # for any value in the graph, create a rectangular ('record') node for it
    label = "{v:%.4f|g:%.4f}" % (n.data, n.grad)
    if n.label:
       label = "{%s|v:%.4f|g:%.4f}" % (n.label, n.data, n.grad)
    dot.node(name = uid, label = str(n) , shape='record')
    if n._op:
      # if this value is a result of some operation, create an op node for it
      dot.node(name = uid + n._op, label = n._op)
      # and connect this node to it
      dot.edge(uid + n._op, uid)

  for n, p in edges:
    # connect n1 to the op node of n2
    dot.edge(str(id(p)), str(id(n)) + n._op)

  return dot

In [None]:
a, b = Value(3, label='a'), Value(2, label='b')

c = a + b + a.exp()
d = c - 10
e = d + c + c.tanh()
e.backward()
draw_dot(e)

In [None]:
a, b = Value(3, label='a'), Value(2, label='b')

c = a + b + a.exp()
d = c - 10
e = d + c + c.tanh()
e.visit()
draw_dot(e)

In [None]:
a, b = Value(0.33, label='a'), Value(2, label='b')

c = a ** b
t = c.tanh() + a

t.backward()
draw_dot(t)

In [None]:
a, b = Value(0.33, label='a'), Value(2, label='b')

c = a ** b
t = c.tanh() + a

t.visit()
draw_dot(t)