In [99]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
%matplotlib inline

In [165]:
class Value:
  def __init__(self,data,_children=(),_op='',label=''):
      self.data = data
      self._prev = set(_children)
      self._backward = lambda: None
      self._op = _op
      self._grad = 0
      self.label = label

  def __repr__(self):
      return f"Value(data={self.data},label={self.label})"

  def __add__(self,other):
      out = Value(self.data + other.data,(self,other),'+')
      def _backward():
          self._grad += 1*out._grad
          other._grad += 1*out._grad
      out._backward = _backward
      return out

  def __mul__(self,other):
      out = Value(self.data * other.data,(self,other),'*')
      def _backward():
          self._grad += other._grad*out._grad
          other._grad += self._grad*out._grad
      out._backward = _backward
      return out

  def tanh(self):
      x = self.data
      t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
      out = Value(t, (self, ), 'tanh')
        
      def _backward():
          self._grad += (1 - t**2) * out._grad
      out._backward = _backward   
      return out

  def backward(self):  
      topo = []
      visited = set()
      def build_topo(v):
        if v not in visited:
          visited.add(v)
          for child in v._prev:
            build_topo(child)
          topo.append(v)
      build_topo(self)
      print(topo)
        
      self._grad = 1.0
        
      for node in reversed(topo):
        node._backward()

class Value1:
  
  def __init__(self, data, _children=(), _op='', label=''):
    self.data = data
    self.grad = 0.0
    self._backward = lambda: None
    self._prev = set(_children)
    self._op = _op
    self.label = label

  def __repr__(self):
    return f"Value(data={self.data},label={self.label})"
  
  def __add__(self, other):
    out = Value1(self.data + other.data, (self, other), '+')
    
    def _backward():
      self.grad += 1.0 * out.grad
      other.grad += 1.0 * out.grad
    out._backward = _backward
    
    return out

  def __mul__(self, other):
    out = Value1(self.data * other.data, (self, other), '*')
    
    def _backward():
      self.grad += other.data * out.grad
      other.grad += self.data * out.grad
    out._backward = _backward
      
    return out
  
  def tanh(self):
    x = self.data
    t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
    out = Value1(t, (self, ), 'tanh')
    
    def _backward():
      self.grad += (1 - t**2) * out.grad
    out._backward = _backward
    
    return out
  
  def backward(self):
    
    topo = []
    visited = set()
    def build_topo(v):
      if v not in visited:
        visited.add(v)
        for child in v._prev:
          build_topo(child)
        topo.append(v)
    build_topo(self)
    print(topo)
    
    self.grad = 1.0
    for node in reversed(topo):
      node._backward()

    



In [166]:
from graphviz import Digraph

def trace(root):
  # builds a set of all nodes and edges in a graph
  nodes, edges = set(), set()
  def build(v):
    if v not in nodes:
      nodes.add(v)
      for child in v._prev:
        edges.add((child, v))
        build(child)
  build(root)
  return nodes, edges

def draw_dot(root):
  dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) # LR = left to right
  
  nodes, edges = trace(root)
  for n in nodes:
    uid = str(id(n))
    # for any value in the graph, create a rectangular ('record') node for it
    dot.node(name = uid, label = "{ %s | data %.4f | grad %.4f }" % (n._label, n.data, n._grad), shape='record')
    if n._op:
      # if this value is a result of some operation, create an op node for it
      dot.node(name = uid + n._op, label = n._op)
      # and connect this node to it
      dot.edge(uid + n._op, uid)

  for n1, n2 in edges:
    # connect n1 to the op node of n2
    dot.edge(str(id(n1)), str(id(n2)) + n2._op)

  return dot

In [167]:


# inputs x1,x2
x1_ = Value(2.0, label='x1')
x2_ = Value(0.0, label='x2')
# weights w1,w2
w1_ = Value(-3.0, label='w1')
w2_ = Value(1.0, label='w2')
# bias of the neuron
b_ = Value(6.8813735870195432, label='b')
# x1*w1 + x2*w2 + b
x1_w1_ = x1_*w1_; x1_w1_.label = 'x1*w1'
x2_w2_ = x2_*w2_; x2_w2_.label = 'x2*w2'
x1w1x2w2_ = x1_w1_ + x2_w2_; x1w1x2w2_.label = 'x1*w1 + x2*w2'
n_ = x1w1x2w2_ + b_; n_.label = 'n'
o_ = n_.tanh(); o_.label = 'o'



In [168]:
o_.backward()

[Value(data=1.0,label=w2), Value(data=0.0,label=x2), Value(data=0.0,label=x2*w2), Value(data=-3.0,label=w1), Value(data=2.0,label=x1), Value(data=-6.0,label=x1*w1), Value(data=-6.0,label=x1*w1 + x2*w2), Value(data=6.881373587019543,label=b), Value(data=0.8813735870195432,label=n), Value(data=0.7071067811865476,label=o)]


In [169]:
x1_._grad

0.0

In [170]:
x1 = Value1(2.0, label='x1')
x2 = Value1(0.0, label='x2')
# weights w1,w2
w1 = Value1(-3.0, label='w1')
w2 = Value1(1.0, label='w2')
# bias of the neuron
b = Value1(6.8813735870195432, label='b')
# x1*w1 + x2*w2 + b
x1w1 = x1*w1; x1w1.label = 'x1*w1'
x2w2 = x2*w2; x2w2.label = 'x2*w2'
x1w1x2w2 = x1w1 + x2w2; x1w1x2w2.label = 'x1*w1 + x2*w2'
n = x1w1x2w2 + b; n.label = 'n'
o = n.tanh(); o.label = 'o'

In [171]:
o.backward()

[Value(data=6.881373587019543,label=b), Value(data=2.0,label=x1), Value(data=-3.0,label=w1), Value(data=-6.0,label=x1*w1), Value(data=1.0,label=w2), Value(data=0.0,label=x2), Value(data=0.0,label=x2*w2), Value(data=-6.0,label=x1*w1 + x2*w2), Value(data=0.8813735870195432,label=n), Value(data=0.7071067811865476,label=o)]


In [172]:
x1.grad

-1.4999999999999996