In [1]:
import math

In [2]:
class Value:
  def __init__(self, data, _parents=()):
    self.data = data
    self._parents = set(_parents)
    self.grad = 0.0
    self._backward = lambda: None

  def verify_value(self, v):
    return v if isinstance(v, Value) else Value(v)

  def __add__(self, addend):
    addend = self.verify_value(addend)
    res = Value(self.data + addend.data, (self, addend))

    def _backward():
      self.grad += res.grad
      addend.grad += res.grad
    res._backward = _backward
    return res

  def __radd__(self, addend):
    return self + addend
  
  def __sub__(self, sh):
    return self + (-sh)
  
  def __mul__(self, multiplier):
    multiplier = self.verify_value(multiplier)
    res = Value(self.data * multiplier.data, (self, multiplier))
    def _backward():
      self.grad += multiplier.data * res.grad
      multiplier.grad += self.data * res.grad
    res._backward = _backward
    return res

  def __rmul__(self, multiplier):
    return self * multiplier

  def __truediv__(self, divisor):
    return self * divisor**-1
  
  def __neg__(self):
    return self * -1

  def __pow__(self, num):
    res = Value(self.data**num, (self,))
    def _backward():
      self.grad += (num*self.data**(num-1)) * res.grad
    res._backward = _backward
    return res

  def tanh(self):
    e = math.exp(2*self.data)
    e = (e - 1)/(e + 1); res = Value(e, (self,))
    def _backward():
      self.grad += (1 - e ** 2) * res.grad
    res._backward = _backward
    return res
  
  def exp(self):
    x = self.data
    out = Value(math.exp(x), (self,))

    def _backward():
      self.grad = out.data * out.grad
    out._backward = _backward
    return out
  
  def backward(self):
    back_path = []
    visited = set()

    def back_flood(node):
      if node not in visited:
        visited.add(node)
        for parent in node._parents:
          back_flood(parent)
        back_path.append(node)
    back_flood(self)
    self.grad = 1.0
    for node in reversed(back_path):
      node._backward()

  def __repr__(self):
    return f"Value(data={self.data} grad={self.grad})"

In [3]:
import random 

class Neuron:
  def __init__(self, num_inputs):
    self.weights = [Value(random.uniform(-1, 1)) for _ in range(num_inputs)]
    self.bias = Value(random.uniform(-1,1))

  def __call__(self, inputs):
    sum_wx = sum((weight*inp for weight, inp in zip(self.weights, inputs)), self.bias)
    return sum_wx.tanh()

  def parameters(self):
    return self.weights + [self.bias]



In [4]:
class Layer:
  def __init__(self, num_inputs, num_out):
    self.neurons = [Neuron(num_inputs) for _ in range(num_out)]

  def __call__(self, x):
    out = [neuron(x) for neuron in self.neurons]
    return out[0] if len(out) == 1 else out

  def parameters(self):
    return [p for neuron in self.neurons for p in neuron.parameters()]

    

In [5]:
class MLP:
  def __init__(self, num_inputs, layer_sizes):
    layer_sizes = [num_inputs] + layer_sizes
    self.layers = [Layer(num_in, num_out) for num_in, num_out in zip(layer_sizes, layer_sizes[1:])]

  def __call__(self, x):
    for layer in self.layers:
      x = layer(x)
    return x

  def parameters(self):
    return [p for layer in self.layers for p in layer.parameters()]



In [6]:
network = MLP(3, [4, 4, 1])

In [7]:
len(network.parameters())

41

In [8]:
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
]
y_true = [1.0, -1.0, -1.0, 1.0]

In [9]:
# check initial predictions 
y_preds = [network(x) for x in xs]
y_preds

[Value(data=0.8305384198559888 grad=0.0),
 Value(data=0.5707631053827998 grad=0.0),
 Value(data=0.2877183100727975 grad=0.0),
 Value(data=0.6663887395777967 grad=0.0)]

In [28]:
for epoch in range(50):
  # get predictions
  y_preds = [network(x) for x in xs]

  # calculate loss
  loss = sum((y_pred - y_t)**2 for y_pred, y_t in zip(y_preds, y_true))
  y_preds = [y.data for y in y_preds]
  print("Predictions: ", y_preds)
  
  # Zero out gradients
  for parameter in network.parameters():
    parameter.grad = 0.0
  
  # calculate gradients
  loss.backward()

  # update weights
  for param in network.parameters():
    param.data += -0.07 * param.grad


Predictions:  [0.9907081553264047, -0.9893815517297624, -0.9837681341516026, 0.9828995647347276]
Predictions:  [0.9907175801604258, -0.989392781888364, -0.9837857384619293, 0.9829178693448728]
Predictions:  [0.9907269764254779, -0.9894039772970273, -0.9838032879560799, 0.9829361173065815]
Predictions:  [0.9907363442653113, -0.9894151381329932, -0.983820782914426, 0.9829543089085763]
Predictions:  [0.9907456838226671, -0.9894262645722446, -0.9838382236153487, 0.9829724444375338]
Predictions:  [0.9907549952392863, -0.98943735678952, -0.9838556103352556, 0.9829905241781037]
Predictions:  [0.990764278655919, -0.9894484149583225, -0.9838729433485995, 0.9830085484129264]
Predictions:  [0.9907735342123324, -0.989459439250933, -0.9838902229278949, 0.9830265174226517]
Predictions:  [0.990782762047321, -0.9894704298384198, -0.9839074493437369, 0.9830444314859564]
Predictions:  [0.990791962298714, -0.9894813868906505, -0.9839246228648172, 0.9830622908795623]
Predictions:  [0.9908011351033847, -0.

In [25]:
print(loss)

Value(data=0.001086478567210565 grad=1.0)
