reference: https://www.youtube.com/watch?v=VMj-3S1tku0

In [1]:
from chainRuleUtils import *

In [2]:
class Value():

    def __init__(self, data, _op="", label="", children=()):
        self._op = _op
        self.grad = 0.0
        self.data = data
        self.label = label
        self.children = children

    def __add__(self, other):
        c = self.data + other.data
        return Value(c, "+", other.label, (self.label, other.label))
    
    def __mul__(self, other):
        c = self.data * other.data
        return Value(c, "*", other.label, (self.label, other.label))

    def __repr__(self):
        x = f"{self.label} : Value({self.data})"
        try: return f"{x} <-- [{self.children[0]}{self._op}{self.children[1]}]"
        except: return x

a = Value(3.00, label="a")  # leaf node
b = Value(-5.00, label="b") # leaf node
d = Value(2.00, label="d")  # leaf node

In [3]:
print(
"""
    a         b
    |         |
    ---> + <---
         |
         z
"""
)
for x in [a, b]: print(x)
print("gradients:")
print("\tdz/da : ", grad_of_a_during_addition(a, b))
print("\tdz/db : ", grad_of_b_during_addition(a, b))


    a         b
    |         |
    ---> + <---
         |
         z

a : Value(3.0)
b : Value(-5.0)
gradients:
	dz/da :  0.9999999999998899
	dz/db :  1.000000000000334


In [4]:
print(
"""
    a         b
    |         |
    ---> * <---
         |
         z
"""
)
for x in [a, b]: print(x)
print("gradients:")
print("\tdz/da : ", grad_of_a_during_multiplication(a, b))
print("\tdz/db : ", grad_of_b_during_multiplication(a, b))


    a         b
    |         |
    ---> * <---
         |
         z

a : Value(3.001)
b : Value(-4.999)
gradients:
	dz/da :  -4.998999999999754
	dz/db :  3.0020000000003932


In [5]:
print(
"""
    a         b
    |         |
    ---> + <---
         c         d
         |         |
         ---> * <---
              |
              z
"""
)
c = a + b; c.label = "c"
for x in [a, b, c, d]: print(x)
print("gradients:")
print("\tdz/dc : ", grad_of_c1(a, b, d)) # c is an intermediate node
print("\tdz/dd : ", grad_of_d1(a, b, d))
print("\tdz/da [dz/dc*dc/da]: ", grad_of_a1(a, b, d)) # because a passes through c
print("\tdz/db [dz/dc*dc/db]: ", grad_of_b1(a, b, d)) # because b passes through c


    a         b
    |         |
    ---> + <---
         c         d
         |         |
         ---> * <---
              |
              z

a : Value(3.002)
b : Value(-4.997999999999999)
c : Value(-1.9959999999999996) <-- [a+b]
d : Value(2.0)
gradients:
	dz/dc :  1.9999999999997797
	dz/dd :  -1.9959999999996647
	dz/da [dz/dc*dc/da]:  2.0009999999994754
	dz/db [dz/dc*dc/db]:  2.0010000000008077


In [6]:
print(
"""
    a         b
    |         |
    ---> * <---
         c         d
         |         |
         ---> + <---
              |
              z
"""
)
c = a * b; c.label = "c"
for x in [a, b, c, d]: print(x)
print("gradients:")
print("\tdz/dc : ", grad_of_c2(a, b, d)) # c is an intermediate node
print("\tdz/dd : ", grad_of_d2(a, b, d))
print("\tdz/da [dz/dc*dc/da]: ", grad_of_a2(a, b, d)) # because a passes through c
print("\tdz/db [dz/dc*dc/db]: ", grad_of_b2(a, b, d)) # because b passes through c


    a         b
    |         |
    ---> * <---
         c         d
         |         |
         ---> + <---
              |
              z

a : Value(3.0029999999999997)
b : Value(-4.996999999999999)
c : Value(-15.005990999999995) <-- [a*b]
d : Value(2.001)
gradients:
	dz/dc :  0.9999999999994458
	dz/dd :  0.9999999999994458
	dz/da [dz/dc*dc/da]:  -4.996999999999474
	dz/db [dz/dc*dc/db]:  3.004000000000673


In [7]:
print(
"""
    a         b
    |         |
    ---> * <---
         c         d
         |         |
         ---> * <---
              |
              z
"""
)
c = a * b; c.label = "c"
for x in [a, b, c, d]: print(x)
print("gradients:")
print("\tdz/dc : ", grad_of_c3(a, b, d)) # c is an intermediate node
print("\tdz/dd : ", grad_of_d3(a, b, d))
print("\tdz/da [dz/dc*dc/da]: ", grad_of_a3(a, b, d)) # because a passes through c
print("\tdz/db [dz/dc*dc/db]: ", grad_of_b3(a, b, d)) # because b passes through c


    a         b
    |         |
    ---> * <---
         c         d
         |         |
         ---> * <---
              |
              z

a : Value(3.0039999999999996)
b : Value(-4.995999999999999)
c : Value(-15.007983999999993) <-- [a*b]
d : Value(2.002)
gradients:
	dz/dc :  2.0020000000009475
	dz/dd :  -15.00798399999681
	dz/da [dz/dc*dc/da]:  -10.00698800000066
	dz/db [dz/dc*dc/db]:  6.019015000003236


In [8]:
print(
"""
    a         b
    |         |
    ---> + <---
         c         d
         |         |
         ---> + <---
              |
              z
"""
)
c = a + b; c.label = "c"
for x in [a, b, c, d]: print(x)
print("gradients:")
print("\tdz/dc : ", grad_of_c4(a, b, d)) # c is an intermediate node
print("\tdz/dd : ", grad_of_d4(a, b, d))
print("\tdz/da [dz/dc*dc/da]: ", grad_of_a4(a, b, d)) # because a passes through c
print("\tdz/db [dz/dc*dc/db]: ", grad_of_b4(a, b, d)) # because b passes through c


    a         b
    |         |
    ---> + <---
         c         d
         |         |
         ---> + <---
              |
              z

a : Value(3.0049999999999994)
b : Value(-4.994999999999998)
c : Value(-1.9899999999999989) <-- [a+b]
d : Value(2.0029999999999997)
gradients:
	dz/dc :  0.9999999999998899
	dz/dd :  0.9999999999998899
	dz/da [dz/dc*dc/da]:  0.9999999999998899
	dz/db [dz/dc*dc/db]:  1.000000000000334


In [9]:
print(
"""
    a         b -----
    |         |     |
    ---> + <---     |
         c          |
         |          |
         ---> + <---
              |
              z
"""
)
c = a + b; c.label = "c"
for x in [a, b, c]: print(x)
print("gradients:")
print("\tdz/dc : ", grad_of_c5(a, b)) # c is an intermediate node
print("\tdz/da [dz/dc*dc/da]: ", grad_of_a5(a, b)) # because a passes through c
print("\tdz/db [dz/dc*dc/db + dz/db]: ", grad_of_b5(a, b)) # because b has two paths,
                                                           # gradients of both are accumulated


    a         b -----
    |         |     |
    ---> + <---     |
         c          |
         |          |
         ---> + <---
              |
              z

a : Value(3.0059999999999993)
b : Value(-4.993999999999998)
c : Value(-1.9879999999999987) <-- [a+b]
gradients:
	dz/dc :  1.000000000000334
	dz/da [dz/dc*dc/da]:  1.000000000000334
	dz/db [dz/dc*dc/db + dz/db]:  2.000000000000668


In [10]:
print(
"""
    a         b -----
    |         |     |
    ---> + <---     |
         c          |
         |          |
         ---> * <---
              |
              z
"""
)
c = a + b; c.label = "c"
for x in [a, b, c]: print(x)
print("gradients:")
print("\tdz/dc : ", grad_of_c6(a, b)) # c is an intermediate node
print("\tdz/da [dz/dc*dc/da]: ", grad_of_a6(a, b)) # because a passes through c
print("\tdz/db [dz/dc*dc/db + dz/db]: ", grad_of_b6(a, b)) # because b has two paths,
                                                           # gradients of both are accumulated


    a         b -----
    |         |     |
    ---> + <---     |
         c          |
         |          |
         ---> * <---
              |
              z

a : Value(3.0069999999999992)
b : Value(-4.992999999999998)
c : Value(-1.9859999999999984) <-- [a+b]
gradients:
	dz/dc :  -4.992999999998915
	dz/da [dz/dc*dc/da]:  -4.992999999998915
	dz/db [dz/dc*dc/db + dz/db]:  -6.9770000000026755


In [11]:
print("""Now let's see backpropogation in action. Let's nudge the final input by
tweaking the leaf-nodes in a postive direction. This should
cause the final output to increase as well.
    a         b
    |         |
    ---> * <---
         c         d
         |         |
         ---> * <---
              |
              z
"""
)
c = a * b; c.label = "c"
for x in [a, b, c, d]: print(x)
z = c * d; z.label = "z"
print("initial :", z)
a.grad = grad_of_a3(a, b, d)
b.grad = grad_of_b3(a, b, d)
c.grad = grad_of_c3(a, b, d)
d.grad = grad_of_d3(a, b, d)

print("lets increase it by a grad of 0.01; z should now be less negative")
a.data += 0.01 * a.grad
b.data += 0.01 * b.grad
c.data += 0.01 * c.grad
d.data += 0.01 * d.grad

c = a * b; c.label = "c"
z = c * d
print("final :", z)
print("\nPS: in neural network the actual data are never changed, its their weights that are updated")

Now let's see backpropogation in action. Let's nudge the final input by
tweaking the leaf-nodes in a postive direction. This should
cause the final output to increase as well.
    a         b
    |         |
    ---> * <---
         c         d
         |         |
         ---> * <---
              |
              z

a : Value(3.007999999999999)
b : Value(-4.991999999999997)
c : Value(-15.015935999999988) <-- [a*b]
d : Value(2.0039999999999996)
initial : z : Value(-30.09193574399997) <-- [c*d]
lets increase it by a grad of 0.01; z should now be less negative
final : d : Value(-26.604083653091294) <-- [c*d]

PS: in neural network the actual data are never changed, its their weights that are updated
