In [None]:
import math
import numpy as np
import matplotlib.pyplot as plt
from graphviz import Digraph
%matplotlib inline

# What is a derivative?

In [None]:
# Let's define a simple quadratic function

def f(x):
    return 3*x**2 - 4*x + 5

In [None]:
xs = np.arange(-5, 5, 0.25)
plt.plot(xs, f(xs))

Intuitively a derivative tells you how `f(x)` is going to change, proportionally, when you change `x`.

More formally:

when you increase `x` by a really small value `h`, how does it affect `f(x)`'s value

Formal definition:
      #Todo: learn how to write formulae in markdown

In [None]:
# Differentiating f(x) with respect to x, we know f'(x) = 6x - 4
# TODO: write an ipywidget where you can input values of h and x and get values of f(x) and f'(x).

h = 0.00000001
x = 3.0
slope = (f(x + h) - f(x))/h

print(f"x={x}\nh={h}\nslope={slope}\ntheoretical slope:{6*x -4}")

In [None]:
# Differentiating f(x) with respect to x, we know f'(x) = 6x - 4
# TODO: write an ipywidget where you can input values of h and x and get values of f(x) and f'(x).

h = 0.00000001
x = -3.0
slope = (f(x + h) - f(x))/h

print(f"x={x}\nh={h}\nslope={slope}\ntheoretical slope:{6*x -4}")

In [None]:
# Differentiating f(x) with respect to x, we know f'(x) = 6x - 4
# TODO: write an ipywidget where you can input values of h and x and get values of f(x) and f'(x).

h = 0.00000001
x = 2/3
slope = (f(x + h) - f(x))/h

print(f"x={x}\nh={h}\nslope={slope}\ntheoretical slope:{6*x -4}")

# Partial derivatives

In [None]:
# wrt `a`
a = 2.0
b = -3.0
c = 10.0

d1 = a*b + c
a += h
d2 = a*b + c

print(f"a={a}\nb={b}\nc={c}\nh={h}\nd1={d1}\nd2={d2}\nslope={(d2-d1)/h}\ntheoretical slope:{b}")

In [None]:
# wrt `b`
a = 2.0
b = -3.0
c = 10.0

d1 = a*b + c
b += h
d2 = a*b + c

print(f"a={a}\nb={b}\nc={c}\nh={h}\nd1={d1}\nd2={d2}\nslope={(d2-d1)/h}\ntheoretical slope:{b}")

In [None]:
# wrt `c`
a = 2.0
b = -3.0
c = 10.0

d1 = a*b + c
c += h
d2 = a*b + c

print(f"a={a}\nb={b}\nc={c}\nh={h}\nd1={d1}\nd2={d2}\nslope={(d2-d1)/h}\ntheoretical slope:{b}")

#### Neural Networks require to keep track of how varying a, b, c affects d viz. f(a,b,c)
So writing a few data structures to store this.

# Understanding forward and back propogation

In [None]:
# Defining a Value class to do math functions and store representations.
class Value:
    def __init__(self, data, _children=(), _op="", label=""):
        """
        param data: Stores the value
        param _children: Stores the values that gave rise to self.data
        param _op: Math operation between children that produced self.data
        """
        self.data = data
        self._prev = set(_children)
        self.grad = 0.0
        self._op = _op
        self.label = label
        
    def __repr__(self):
        return f"Value=({self.data})"
        
    def __add__(self, other):
        out = Value(self.data + other.data, (self, other), "+")
        return out
        
    def __mul__(self, other):
        out = Value(self.data * other.data, (self, other), "*")
        return out

In [None]:
# Rewriting the expression above with this datatype
h = 0.001
a = Value(2.0, label="a")
b = Value(-3.0, label="b")
c = Value(10.0, label="c")
e = a * b; e.label = "e"
d = e + c; d.label = "d"
f = Value(-2.0, label="f")
L = d * f; L.label = "L"

In [None]:
# Manually updating 

In [None]:
def trace(root):
  # builds a set of all nodes and edges in a graph
  nodes, edges = set(), set()
  def build(v):
    if v not in nodes:
      nodes.add(v)
      for child in v._prev:
        edges.add((child, v))
        build(child)
  build(root)
  return nodes, edges

def draw_dot(root):
  dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) # LR = left to right
  
  nodes, edges = trace(root)
  for n in nodes:
    uid = str(id(n))
    # for any value in the graph, create a rectangular ('record') node for it
    dot.node(name = uid, label = "{ %s | data %.4f | grad %.4f }" % (n.label, n.data, n.grad), shape='record')
    if n._op:
      # if this value is a result of some operation, create an op node for it
      dot.node(name = uid + n._op, label = n._op)
      # and connect this node to it
      dot.edge(uid + n._op, uid)

  for n1, n2 in edges:
    # connect n1 to the op node of n2
    dot.edge(str(id(n1)), str(id(n2)) + n2._op)

  return dot

In [None]:
def lol():
    # Rewriting the expression above with this datatype
    h = 0.0001
    
    a = Value(2.0, label="a")
    b = Value(-3.0, label="b")
    c = Value(10.0, label="c")
    e = a * b; e.label = "e"
    d = e + c; d.label = "d"
    f = Value(-2.0, label="f")
    L = d * f; L.label = "L"
    L1 = L.data

    a = Value(2.0, label="a")
    b = Value(-3.0 + h, label="b")
    c = Value(10.0, label="c")
    e = a * b; e.label = "e"
    d = e + c; d.label = "d"
    f = Value(-2.0, label="f")
    L = d * f; L.label = "L"
    L2 = L.data
    print((L2 - L1)/h)

lol()

L = d * f; d = e + c; e = a * b

How does changing all these variables incrementally change L's value?

In [None]:
L.grad = 1.0
f.grad = 4.0
d.grad = -2.0
c.grad = -2.0
e.grad = -2.0
b.grad = -4.0
a.grad = 6.0
draw_dot(L)

In [None]:
# Nudging all the source values in the direction of the gradient should increase the value of L.

step = 0.01
a.data += step * a.grad
b.data += step * b.grad
c.data += step * c.grad
f.data += step * f.grad

e = a * b
d = e + c
L = d * f

print(L.data)