In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math

In [53]:
class Value:
  def __init__(self, data, _parents=()):
    self.data = data
    self._parents = set(_parents)
    self.grad = 0.0
    self._backward = lambda: None

  def verify_value(self, v):
    return v if isinstance(v, Value) else Value(v)

  def __add__(self, addend):
    addend = self.verify_value(addend)
    res = Value(self.data + addend.data, (self, addend))

    def _backward():
      self.grad += res.grad
      addend.grad += res.grad
    res._backward = _backward
    return res

  def __radd__(self, addend):
    return self + addend
  
  def __sub__(self, sh):
    return self + (-sh)
  
  def __mul__(self, multiplier):
    multiplier = self.verify_value(multiplier)
    res = Value(self.data * multiplier.data, (self, multiplier))
    def _backward():
      self.grad += multiplier.data * res.grad
      multiplier.grad += self.data * res.grad
    res._backward = _backward
    return res

  def __rmul__(self, multiplier):
    return self * multiplier

  def __truediv__(self, divisor):
    return self * divisor**-1
  
  def __neg__(self):
    return self * -1

  def __pow__(self, num):
    res = Value(self.data**num, (self,))
    def _backward():
      self.grad += (num*self.data**(num-1)) * res.grad
    res._backward = _backward
    return res

  def tanh(self):
    e = math.exp(2*self.data)
    e = (e - 1)/(e + 1); res = Value(e, (self,))
    def _backward():
      self.grad += (1 - e ** 2) * res.grad
    res._backward = _backward
    return res
  
  def exp(self):
    x = self.data
    out = Value(math.exp(x), (self,))

    def _backward():
      self.grad = out.data * out.grad
    out._backward = _backward
    return out
  
  def backward(self):
    back_path = []
    visited = set()

    def back_flood(node):
      if node not in visited:
        visited.add(node)
        for parent in node._parents:
          back_flood(parent)
        back_path.append(node)
    back_flood(self)
    self.grad = 1.0
    for node in reversed(back_path):
      node._backward()

  def __repr__(self):
    return f"Value(data={self.data} grad={self.grad})"

In [54]:
import random 

class Neuron:
  def __init__(self, num_inputs):
    self.weights = [Value(random.uniform(-1, 1)) for _ in range(num_inputs)]
    self.bias = Value(random.uniform(-1,1))

  def __call__(self, inputs):
    sum_wx = sum((weight*inp for weight, inp in zip(self.weights, inputs)), self.bias)
    return sum_wx.tanh()

  def parameters(self):
    return self.weights + [self.bias]



In [55]:
class Layer:
  def __init__(self, num_inputs, num_out):
    self.neurons = [Neuron(num_inputs) for _ in range(num_out)]

  def __call__(self, x):
    out = [neuron(x) for neuron in self.neurons]
    return out[0] if len(out) == 1 else out

  def parameters(self):
    return [p for neuron in self.neurons for p in neuron.parameters()]

    

In [56]:
class MLP:
  def __init__(self, num_inputs, layer_sizes):
    layer_sizes = [num_inputs] + layer_sizes
    self.layers = [Layer(num_in, num_out) for num_in, num_out in zip(layer_sizes, layer_sizes[1:])]

  def __call__(self, x):
    for layer in self.layers:
      x = layer(x)
    return x

  def parameters(self):
    return [p for layer in self.layers for p in layer.parameters()]



In [57]:
n = MLP(3, [4, 4, 1])

In [58]:
len(n.parameters())

41

In [59]:
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
]
y_true = [1.0, -1.0, -1.0, 1.0]
y_preds = [n(x) for x in xs]
y_preds

[Value(data=-0.09194734629566204 grad=0.0),
 Value(data=-0.889818225790853 grad=0.0),
 Value(data=-0.7906767888422505 grad=0.0),
 Value(data=-0.47238741986496324 grad=0.0)]

In [None]:
y_preds = [n(x) for x in xs]
y_preds

[Value(data=-0.9984779212859008 grad=0.0),
 Value(data=0.3746992569264099 grad=0.0),
 Value(data=-0.9092366126582377 grad=0.0),
 Value(data=-0.9416439776253042 grad=0.0)]

In [60]:
mse_loss = sum((y_p - y_t)**2 for y_t, y_p in zip(y_true, y_preds))
mse_loss

Value(data=3.4162299513560095 grad=0.0)

In [61]:
mse_loss.backward()

In [62]:
for param in n.parameters():
  param.data += -0.03 * param.grad

In [63]:
for parent in mse_loss._parents:
  print(parent.data, parent.grad)
  for par in parent._parents:
    print(par.data, par.grad)
    for p in par._parents:
      print(p.data, p.grad)
      for x in p._parents:
        print(x.data, x.grad)
        for a in x._parents:
          print(a.data, a.grad)
          for b in a._parents:
            print(b.data, b.grad)
            for c in b._parents:
              print(c.data, x.grad)
              for d in c._parents:
                print(d.data, d.grad)



2.1679247141766034 1.0
-1.4723874198649631 -2.9447748397299263
-0.47238741986496324 -2.9447748397299263
-0.5131390964333955 -2.2876487039704596
0.27925237705474437 -2.2876487039704596
-0.31571092605053064 2.0234692110329715
-0.3268759878122762 -2.2876487039704596
-0.5343061230279714 1.821783177580836
0.2074301352156952 1.821783177580836
-0.9654044566921827 2.696175084415339
-0.7923914734881399 -2.2876487039704596
0.14956337916433338 -2.2876487039704596
-0.24246125475228897 -2.2876487039704596
-0.2473874446346443 1.3281892452985213
-0.6353969769103004 -2.2876487039704596
-0.9419548526524733 -2.2876487039704596
-0.8421162118688306 -2.2876487039704596
-0.3064208390394621 -2.2876487039704596
-0.40818715701614905 -4.25027386044065
-0.09983864078364266 -2.2876487039704596
0.0022745103487239043 -3.977040400018808
0.8530541221218084 0.2677388588508099
-1.0 -2.9447748397299263
1.2483052371794061 1.0
0.0438162067293918 1.0
0.20932321115774954 0.4186464223154991
1.0 0.4186464223154991
-0.79067678

In [64]:
for param in n.parameters():
  print(param.data, param.grad)

0.8011933283393744 -3.8790969141804634
-0.5158701216428029 -5.369917403505679
0.014680879125538851 2.4625667962682094
0.8701798012824639 -2.3151500291038327
0.27582818362848815 3.2760792990987655
-0.8194693425201133 3.709975974632634
0.4787286916789601 -2.9580097863057064
0.6985542712476596 2.730941897860051
-0.5889241864375832 0.10438916770078338
-0.3407862158624901 0.09618612713647205
0.06627844451289898 -0.13680897591742727
-0.7263534442871229 0.09619717576217567
-0.09050323642047987 1.1743714371591132
-0.4970034275090463 1.3079404370364311
-0.10638449529862615 -1.2086211173257808
-0.4518748366270408 1.083225142420872
-0.4467552667774188 0.2473214454441985
0.5166132593037579 -0.4003941991036089
-0.9736315933623217 -0.86981132749682
0.7050274266302025 -0.7402300778941174
0.7409659247316882 0.8936211028194228
-0.19428325356581885 0.03501792303936651
-0.972842230920233 -0.012989528035830212
-0.6666264276373559 -0.06254552666817673
-0.7131587068465065 -0.046779318811035246
0.34288954986

In [52]:
n.layers[0].neurons[0].weights[0].grad

-0.8316832167450405