In [1]:
class Variable:
    def __init__(self, value, name, no_grad: bool=False):
        self.value = value
        self.name = name
        self.no_grad = no_grad
        self.grad_fn = None
        self.grad = 0

    def __repr__(self):
        return "{0:.2f}".format(self.value)
    
    def backward(self, grad = 1):
        if self.no_grad is True:
            return
        
        if self.grad_fn is None:
            return
        
        self.grad_fn.backward(self.grad + grad)

In [2]:
class Operation:
    def __call__(self, *inputs):
        self.inputs = inputs
        o = self.forward()
        o.grad_fn = self
        return o
    
    def forward(self):
        raise NotImplementedError
    
    def backward(self, grad):
        raise NotImplementedError

In [3]:
class Power(Operation):
    def __init__(self, e: int):
        self.e = e

    def forward(self):
        x, = self.inputs
        name = f"({x.name}**{self.e})"
        return Variable(x.value ** self.e, name)
    
    def backward(self, grad):
        # print(f"Gradient propagated: {grad}")
        x, = self.inputs
        d = self.e - 1
        grad = grad * self.e * (x.value ** d)
        x.backward(grad)
        x.grad = grad

In [4]:
class Add(Operation):
    def forward(self):
        x, y = self.inputs
        name = f"({x.name}+{y.name})"
        return Variable(x.value + y.value, name)
    
    def backward(self, grad):
        # print(f"Gradient propagated: {grad}")
        x, y = self.inputs
        x.backward(grad)
        y.backward(grad)
        x.grad = grad
        y.grad = grad

In [5]:
class Subtract(Operation):
    def forward(self):
        x, y = self.inputs
        name = f"({x.name}-{y.name})"
        return Variable(x.value - y.value, name)

    def backward(self, grad):
        # print(f"Gradient propagated: {grad}")
        x, y = self.inputs
        x.backward(grad)
        y.backward(-grad)
        x.grad = grad
        y.grad = -grad

In [6]:
class Multiply(Operation):
    def forward(self):
        x, y = self.inputs
        name = f"({x.name}*{y.name})"
        return Variable(x.value * y.value, name)
    
    def backward(self, grad):
        # print(f"Gradient propagated: {grad}")
        x, y = self.inputs
        x.backward(grad * x.value)
        y.backward(grad * x.value)
        x.grad = grad * x.value
        y.grad = grad * y.value

### Example 1
- Learning rate: 0.01
- g(x) = 2 * x
- f(x, y) = y * g(x)
- z(x, y) = (49 - f(x, y)) ** 2

In [7]:
alpha = 0.0001
a = Variable(2, "a", no_grad = True)
b = Variable(2, "b", no_grad = True)
c = Variable(49, "c", no_grad = True)
x = Variable(2, "x")
y = Variable(3, "y")

In [8]:
for i in range(10):
    x.grad = 0
    y.grad = 0
    z = Multiply()(a, x)
    s = Multiply()(z, y)
    t = Subtract()(c, s)
    L = Power(2)(t)
    L.backward()
    print(f"Loss: {L}, input: {x, y}, grad: {x.grad, y.grad}")
    x_grad = x.grad
    y_grad = y.grad
    x.value = x.value - (alpha * x.grad)
    y.value = y.value - (alpha * y.grad)

Loss: 1369.00, input: (2.00, 3.00), grad: (-592, -222)
Loss: 1336.15, input: (2.06, 3.02), grad: (-619.9897234555995, -220.94319881548802)
Loss: 1302.12, input: (2.12, 3.04), grad: (-649.4537333273637, -219.70606370711485)
Loss: 1266.89, input: (2.19, 3.07), grad: (-680.435854221854, -218.2776158830505)
Loss: 1230.45, input: (2.25, 3.09), grad: (-712.9704947144901, -216.646485625168)
Loss: 1192.78, input: (2.33, 3.11), grad: (-747.0794686668054, -214.80096534387556)
Loss: 1153.88, input: (2.40, 3.13), grad: (-782.7681855251886, -212.72908261576163)
Loss: 1113.77, input: (2.48, 3.15), grad: (-820.0211374318666, -210.41869726491618)
Loss: 1072.46, input: (2.56, 3.17), grad: (-858.7966215627147, -207.8576270274884)
Loss: 1029.98, input: (2.65, 3.19), grad: (-899.0206571126828, -205.0338067561489)


In [9]:
L.name

'((c-((a*x)*y))**2)'

### Example 2
- Learning rate: 0.01
- $(49-(x+y))^2$

In [10]:
alpha = 0.01
a = Variable(49, "a", no_grad = True)
x = Variable(2, "x")
y = Variable(3, "y")

In [11]:
for i in range(10):
    x.grad = 0
    y.grad = 0
    z = Add()(x, y)
    t = Subtract()(a, z)
    L = Multiply()(t, t)
    L.backward()
    print(f"Loss: {L}, input: {x, y}, grad: {x.grad, y.grad}")
    x_grad = x.grad
    y_grad = y.grad
    x.value = x.value - (alpha * x.grad)
    y.value = y.value - (alpha * y.grad)

Loss: 1936.00, input: (2.00, 3.00), grad: (-88, -88)
Loss: 1784.22, input: (2.88, 3.88), grad: (-84.48, -84.48)
Loss: 1644.33, input: (3.72, 4.72), grad: (-81.10079999999999, -81.10079999999999)
Loss: 1515.42, input: (4.54, 5.54), grad: (-77.856768, -77.856768)
Loss: 1396.61, input: (5.31, 6.31), grad: (-74.74249728, -74.74249728)
Loss: 1287.12, input: (6.06, 7.06), grad: (-71.75279738879999, -71.75279738879999)
Loss: 1186.21, input: (6.78, 7.78), grad: (-68.882685493248, -68.882685493248)
Loss: 1093.21, input: (7.47, 8.47), grad: (-66.12737807351807, -66.12737807351807)
Loss: 1007.50, input: (8.13, 9.13), grad: (-63.48228295057736, -63.48228295057736)
Loss: 928.51, input: (8.76, 9.76), grad: (-60.942991632554254, -60.942991632554254)


In [12]:
L.name

'((a-(x+y))*(a-(x+y)))'