#Autodiff

In [None]:
# computational graph -> DAG
# topological sorting
# go through back prop

In [None]:
class Variable:
  def __init__(self, value):
    self.value = value
    self.gradient = 0
    self.fanout = 0

  def __add__(self, other):
    self.fanout += 1
    other.fanout += 1
    return Addition(self, other)

  def __pow__(self, c):
    self.fanout += 1
    return Power(self, c)

  def __str__(self):
    return str(self.value)

  def __mul__(self, other):
    self.fanout += 1
    other.fanout += 1
    return Multiplication(self, other)

  def backward(self):
    pass

class Addition(Variable):
  def __init__(self, v1, v2):
    super().__init__(v1.value+v2.value)
    self.v1 = v1
    self.v2 = v2

  def backward(self):
    self.v1.gradient += self.gradient
    self.v2.gradient += self.gradient

    self.v1.fanout -= 1
    self.v2.fanout -= 1

    if(self.v1.fanout == 0):
      self.v1.backward()
    if(self.v2.fanout == 0):
      self.v2.backward()

class Multiplication(Variable):
  def __init__(self, v1, v2):
    super().__init__(v1.value*v2.value)
    self.v1 = v1
    self.v2 = v2

  def backward(self):
    self.v1.gradient += self.v2.value * self.gradient
    self.v2.gradient += self.v1.value * self.gradient

    self.v1.fanout -= 1
    self.v2.fanout -= 1

    if(self.v1.fanout == 0):
      self.v1.backward()
    if(self.v2.fanout == 0):
      self.v2.backward()

class Power(Variable):
  def __init__(self, v1, c):
    super().__init__(v1.value**c)
    self.v1 = v1
    self.c = c

  def backward(self):
    self.v1.gradient += self.gradient * self.c * self.v1.value**(self.c - 1)

    self.v1.fanout -= 1

    if(self.v1.fanout == 0):
      self.v1.backward()
    if(self.v2.fanout == 0):
      self.v2.backward()

In [None]:
x = Variable(1)
y = Variable(2)
f = x + y
f.gradient = 1
f.backward()
#x+y #x.__add__(y)
print(x,y,f)
print(x.gradient, y.gradient)

1 2 3
1 1


In [None]:
x = Variable(1)
y = Variable(2)
f = x + x
f.gradient = 1
f.backward()
#x+y #x.__add__(y)
print(x,y,f)
print(x.gradient) #to avoid x.gradient to be 1 => we should not assign, but accumulate the gradient.

1 2 2
2


In [None]:
x = Variable(3)
y = Variable(4)

f = x * y
f.gradient = 1
f.backward()

print(x,y,f)
print(x.gradient, y.gradient)

3 4 12
4 3


In [None]:
x = Variable(3)
y = Variable(4)

xy = x * y
first = x*xy
second = y*xy
f = first+second

f.gradient = 1

f.backward()
#first.backward()
#second.backward()
#xy.backward()

print(x,y,f)
print(x.gradient, y.gradient)

3 4 84
12 12


In [None]:
x = Variable(3)
y = Variable(4)

x2 = x*x
y2 = y*y
x2y = x2*y
y2x = y2*x
f=x2y+y2x

f.gradient = 1
f.backward()
#y2x.backward()
#x2y.backward()
#x2.backward()
#y2.backward()

print(x,y,f)
print(x.gradient, y.gradient)

3 4 84
40 33


In [None]:
x = Variable(2)

x_sqrt = x ** 0.5
f = x_sqrt ** 2

f.gradient = 1
f.backward()
x_sqrt.backward()

x.gradient

1.0000000000000002

In [None]:
x = Variable(-2)

x_sqrt = x ** 0.5
f = x_sqrt ** 2

f.gradient = 1
f.backward()
x_sqrt.backward()

x.gradient

(1.0000000000000002+0j)

In [None]:
x = Variable(0)

x_sqrt = x ** 0.5
f = x_sqrt ** 2

f.gradient = 1
f.backward()
x_sqrt.backward()

x.gradient

ZeroDivisionError: 0.0 cannot be raised to a negative power

In [None]:
x = Variable(2)

x_sqrt = x ** 0.5
f = x_sqrt ** 2

f.gradient = 1
f.backward()

x.gradient

0