In [1]:
import json_tricks
import numpy
import numpy as np


inputs1 = json_tricks.load(open('inputs1.json'))
inputs2 = json_tricks.load(open('inputs2.json'))

answer = {}

# Implementing Backpropagation package for Numpy

In this project, we will implement the backpropagation algorithm for numpy package.
Your task will be to implement all the methods of the Node class that will be a wrapper for numpy arrays
supporting backpropagation.

Step-by-step, we will implement the following methods:

1. `__init__` (a constructor)
3. `backward` (a recursive mechanism that triggers backpropagation)
4. `__neg__` (negation operator $- x$)
5. `__add__` (addition operator $x + y$)
6. `__mul__` (product operator $x \cdot y$)
7. `__sub__` (substitution operator $x - y$)
8. `__truediv__` (division operator $x / y$)
9. `exp` (exponentiation $\exp(x)$)
10. `sum` (summation $\sum_{k} x_k$)
11. `matmul` (matrix product $XY$)

After that we will use the implemented methods on:
1. Simple graph from the previous task
2. Two-layer neural network

Let's start!

`Node([1]) + Node([2]) -> Node`
`(x + y).backward()`

In [5]:
import numpy
import copy

# Node(np.array([1]))
class Node:
    def __init__(self, data):
        self.data = np.array(data, dtype=np.float64)
        self.grad = np.zeros_like(self.data)
        self.parents = []
        self.backward_step = None
        
    def update_grad(self, grad_output):
        self.grad = self.grad + grad_output

    def backward(self, grad_output=None):
        if grad_output is None:
            grad_output = np.ones_like(self.data)

        self.update_grad(grad_output)
        if self.backward_step:
            self.backward_step(grad_output)

    def backward_step(self, grad_output=None):
        if self.backward_step:
            self.backward_step(grad_output)


    # @backprop
    # def backward(self, grad_output=None):


    def __str__(self):
        return f"Node(data={self.data}, grad={self.grad})"

    def __neg__(self):
        return TensorNeg()(self)

    def __add__(self, other):
        return TensorSum()(self, other)

    def __mul__(self, other):
        return TensorMul()(self, other)

    def __sub__(self, other):
        return TensorSub()(self, other)

    def __truediv__(self, other):
        return TensorDiv()(self, other)

    def exp(self):
        return TensorExp()(self)

    def sum(self, axis=None):
        return TensorSumReduce()(self)

    def __matmul__(self, other):
        return TensorMatMul()(self, other)


class TensorSum(Node):
    def __init__(self):
        self.input1 = None
        self.input2 = None

    def __call__(self, input1, input2):
        self.input1 = input1
        self.input2 = input2
        output = Node(input1.data + input2.data)
        # print(f"TensorSum: {input1.data} + {input2.data} = {output.data}")

        def backward_step(grad_output):
            self.input1.backward(grad_output)
            self.input2.backward(grad_output)

        output.backward_step = backward_step
        return output

    def backward_step(self, grad_output=None):
        pass



class TensorSub(Node):
    def __init__(self):
        self.input1 = None
        self.input2 = None

    def __call__(self, input1, input2):
        self.input1 = input1
        self.input2 = input2
        output = Node(input1.data - input2.data)
        # print(f"TensorSub: {input1.data} - {input2.data} = {output.data}")

        def backward_step(grad_output):
            self.input1.backward(grad_output)
            self.input2.backward(-grad_output)

        output.backward_step = backward_step
        return output

    def backward_step(self, grad_output=None):
        pass



class TensorMul(Node):
    def __init__(self):
        self.input1 = None
        self.input2 = None

    def __call__(self, input1, input2):
        self.input1 = input1
        self.input2 = input2
        output = Node(input1.data * input2.data)
        # print(f"TensorMul: {input1.data} * {input2.data} = {output.data}")

        def backward_step(grad_output):
            self.input1.backward(grad_output * self.input2.data)
            self.input2.backward(grad_output * self.input1.data)

        output.backward_step = backward_step
        return output

    def backward_step(self, grad_output=None):
        pass



class TensorDiv(Node):
    def __init__(self):
        self.input1 = None
        self.input2 = None

    def __call__(self, input1, input2):
        self.input1 = input1
        self.input2 = input2
        output = Node(input1.data / input2.data)
        # print(f"TensorDiv: {input1.data} / {input2.data} = {output.data}")

        def backward_step(grad_output):
            self.input1.backward(grad_output / self.input2.data)
            self.input2.backward(-grad_output * self.input1.data / (self.input2.data ** 2))

        output.backward_step = backward_step
        return output

    def backward_step(self, grad_output=None):
        pass


class TensorNeg(Node):
    def __init__(self):
        self.input1 = None

    def __call__(self, input1):
        self.input1 = input1
        output = Node(-input1.data)
        # print(f"TensorNeg: -{input1.data} = {output.data}")

        def backward_step(grad_output):
            self.input1.backward(-grad_output)

        output.backward_step = backward_step
        return output

    def backward_step(self, grad_output=None):
        pass



class TensorExp(Node):
    def __init__(self):
        self.input1 = None
        self.exp_output = None

    def __call__(self, input1):
        self.input1 = input1
        self.exp_output = np.exp(input1.data)
        output = Node(self.exp_output)
        # print(f"TensorExp: exp({input1.data}) = {output.data}")

        def backward_step(grad_output):
            self.input1.backward(grad_output * self.exp_output)

        output.backward_step = backward_step
        return output

    def backward_step(self, grad_output=None):
        pass



class TensorSumReduce(Node):
    def __init__(self):
        self.input1 = None

    def __call__(self, input1):
        self.input1 = input1
        output = Node(np.array([input1.data.sum()]))
        # print(f"TensorSumReduce: sum({input1.data}) = {output.data}")

        def backward_step(grad_output):
            self.input1.backward(grad_output * np.ones_like(self.input1.data))

        output.backward_step = backward_step
        return output

    def backward_step(self, grad_output=None):
        pass



class TensorMatMul(Node):
    def __init__(self):
        self.input1 = None
        self.input2 = None

    def __call__(self, input1, input2):
        self.input1 = input1
        self.input2 = input2
        output = Node(input1.data @ input2.data)
        # print(f"TensorMatMul: {input1.data} @ {input2.data} = {output.data}")

        def backward_step(grad_output):
            self.input1.backward(grad_output @ self.input2.data.T)
            self.input2.backward(self.input1.data.T @ grad_output)

        output.backward_step = backward_step
        return output

    def backward_step(self, grad_output=None):
        pass



In [8]:
# TEST 1

x = Node(numpy.array([1]))
print(x)
x.backward()
print(x)

answer['init'] = []

for inp in inputs1:
    x = Node(inp['x'][0])

    x.backward()

    answer['init'].append(np.array(x.grad, dtype=np.int64))
    print(x)

Node(data=[1.], grad=[0.])
Node(data=[1.], grad=[1.])
Node(data=-1.0, grad=1.0)
Node(data=4.0, grad=1.0)
Node(data=-4.0, grad=1.0)
Node(data=8.0, grad=1.0)
Node(data=9.0, grad=1.0)
Node(data=5.0, grad=1.0)
Node(data=1.0, grad=1.0)
Node(data=8.0, grad=1.0)
Node(data=6.0, grad=1.0)
Node(data=-6.0, grad=1.0)
Node(data=-5.0, grad=1.0)
Node(data=6.0, grad=1.0)
Node(data=-10.0, grad=1.0)
Node(data=3.0, grad=1.0)
Node(data=-5.0, grad=1.0)
Node(data=-7.0, grad=1.0)
Node(data=-3.0, grad=1.0)
Node(data=9.0, grad=1.0)
Node(data=-6.0, grad=1.0)
Node(data=-10.0, grad=1.0)
Node(data=-2.0, grad=1.0)
Node(data=-2.0, grad=1.0)
Node(data=8.0, grad=1.0)
Node(data=-5.0, grad=1.0)
Node(data=-1.0, grad=1.0)
Node(data=-2.0, grad=1.0)
Node(data=6.0, grad=1.0)
Node(data=9.0, grad=1.0)
Node(data=-10.0, grad=1.0)
Node(data=2.0, grad=1.0)
Node(data=2.0, grad=1.0)
Node(data=1.0, grad=1.0)
Node(data=-8.0, grad=1.0)
Node(data=1.0, grad=1.0)
Node(data=-2.0, grad=1.0)
Node(data=7.0, grad=1.0)
Node(data=-10.0, grad=1.0

In [4]:
# TEST 2

x = Node(numpy.array([1]))
y = Node(numpy.array([2]))

z = x + y + x + x + x + y

print(z)
z.backward()
print(x, y)

answer['sum'] = []
for inp in inputs1:
    x = Node(inp['x'][0])
    y = Node(inp['x'][1])

    z = x + y + x + x + x + y
    z.backward()

    answer['sum'].append(x.grad)

TensorSum: [1.] + [2.] = [3.]
TensorSum: [3.] + [1.] = [4.]
TensorSum: [4.] + [1.] = [5.]
TensorSum: [5.] + [1.] = [6.]
TensorSum: [6.] + [2.] = [8.]
Node(data=[8.], grad=[0.])
Node(data=[1.], grad=[4.]) Node(data=[2.], grad=[2.])
TensorSum: -1.0 + 4.0 = 3.0
TensorSum: 3.0 + -1.0 = 2.0
TensorSum: 2.0 + -1.0 = 1.0
TensorSum: 1.0 + -1.0 = 0.0
TensorSum: 0.0 + 4.0 = 4.0
TensorSum: 4.0 + -3.0 = 1.0
TensorSum: 1.0 + 4.0 = 5.0
TensorSum: 5.0 + 4.0 = 9.0
TensorSum: 9.0 + 4.0 = 13.0
TensorSum: 13.0 + -3.0 = 10.0
TensorSum: -4.0 + -3.0 = -7.0
TensorSum: -7.0 + -4.0 = -11.0
TensorSum: -11.0 + -4.0 = -15.0
TensorSum: -15.0 + -4.0 = -19.0
TensorSum: -19.0 + -3.0 = -22.0
TensorSum: 8.0 + -5.0 = 3.0
TensorSum: 3.0 + 8.0 = 11.0
TensorSum: 11.0 + 8.0 = 19.0
TensorSum: 19.0 + 8.0 = 27.0
TensorSum: 27.0 + -5.0 = 22.0
TensorSum: 9.0 + -5.0 = 4.0
TensorSum: 4.0 + 9.0 = 13.0
TensorSum: 13.0 + 9.0 = 22.0
TensorSum: 22.0 + 9.0 = 31.0
TensorSum: 31.0 + -5.0 = 26.0
TensorSum: 5.0 + -8.0 = -3.0
TensorSum: -3.0 

In [54]:
# TEST 3

x = Node(numpy.array([1]))
y = Node(numpy.array([2]))

z = x - y + x + x - x - y

print(z)
z.backward()
print(x, y)

answer['diff'] = []
for inp in inputs1:
    x = Node(inp['x'][0])
    y = Node(inp['x'][1])

    z = x - y + x + x - y - y
    z.backward()

    answer['diff'].append(x.grad)

TensorSub: [1.] - [2.] = [-1.]
TensorSum: [-1.] + [1.] = [0.]
TensorSum: [0.] + [1.] = [1.]
TensorSub: [1.] - [1.] = [0.]
TensorSub: [0.] - [2.] = [-2.]
Node(data=[-2.], grad=[0.])
Node(data=[1.], grad=[2.]) Node(data=[2.], grad=[-2.])
TensorSub: -1.0 - 4.0 = -5.0
TensorSum: -5.0 + -1.0 = -6.0
TensorSum: -6.0 + -1.0 = -7.0
TensorSub: -7.0 - 4.0 = -11.0
TensorSub: -11.0 - 4.0 = -15.0
TensorSub: 4.0 - -3.0 = 7.0
TensorSum: 7.0 + 4.0 = 11.0
TensorSum: 11.0 + 4.0 = 15.0
TensorSub: 15.0 - -3.0 = 18.0
TensorSub: 18.0 - -3.0 = 21.0
TensorSub: -4.0 - -3.0 = -1.0
TensorSum: -1.0 + -4.0 = -5.0
TensorSum: -5.0 + -4.0 = -9.0
TensorSub: -9.0 - -3.0 = -6.0
TensorSub: -6.0 - -3.0 = -3.0
TensorSub: 8.0 - -5.0 = 13.0
TensorSum: 13.0 + 8.0 = 21.0
TensorSum: 21.0 + 8.0 = 29.0
TensorSub: 29.0 - -5.0 = 34.0
TensorSub: 34.0 - -5.0 = 39.0
TensorSub: 9.0 - -5.0 = 14.0
TensorSum: 14.0 + 9.0 = 23.0
TensorSum: 23.0 + 9.0 = 32.0
TensorSub: 32.0 - -5.0 = 37.0
TensorSub: 37.0 - -5.0 = 42.0
TensorSub: 5.0 - -8.0 = 1

In [55]:
# TEST 4

x = Node(numpy.array([1]))
y = Node(numpy.array([2]))

z = (x + y) * (x - y)

print(z)
z.backward()
print(x, y)

answer['mul'] = []
for inp in inputs1:
    x = Node(inp['x'][0])
    y = Node(inp['x'][1])

    z = (x + y) * (x - y) * (x + x + y)
    z.backward()

    answer['mul'].append(x.grad)

TensorSum: [1.] + [2.] = [3.]
TensorSub: [1.] - [2.] = [-1.]
TensorMul: [3.] * [-1.] = [-3.]
Node(data=[-3.], grad=[0.])
Node(data=[1.], grad=[2.]) Node(data=[2.], grad=[-4.])
TensorSum: -1.0 + 4.0 = 3.0
TensorSub: -1.0 - 4.0 = -5.0
TensorMul: 3.0 * -5.0 = -15.0
TensorSum: -1.0 + -1.0 = -2.0
TensorSum: -2.0 + 4.0 = 2.0
TensorMul: -15.0 * 2.0 = -30.0
TensorSum: 4.0 + -3.0 = 1.0
TensorSub: 4.0 - -3.0 = 7.0
TensorMul: 1.0 * 7.0 = 7.0
TensorSum: 4.0 + 4.0 = 8.0
TensorSum: 8.0 + -3.0 = 5.0
TensorMul: 7.0 * 5.0 = 35.0
TensorSum: -4.0 + -3.0 = -7.0
TensorSub: -4.0 - -3.0 = -1.0
TensorMul: -7.0 * -1.0 = 7.0
TensorSum: -4.0 + -4.0 = -8.0
TensorSum: -8.0 + -3.0 = -11.0
TensorMul: 7.0 * -11.0 = -77.0
TensorSum: 8.0 + -5.0 = 3.0
TensorSub: 8.0 - -5.0 = 13.0
TensorMul: 3.0 * 13.0 = 39.0
TensorSum: 8.0 + 8.0 = 16.0
TensorSum: 16.0 + -5.0 = 11.0
TensorMul: 39.0 * 11.0 = 429.0
TensorSum: 9.0 + -5.0 = 4.0
TensorSub: 9.0 - -5.0 = 14.0
TensorMul: 4.0 * 14.0 = 56.0
TensorSum: 9.0 + 9.0 = 18.0
TensorSum: 1

In [56]:
# TEST 5

x = Node(numpy.array([1]))
y = Node(numpy.array([2]))

print(z)
z.backward()
print(x, y)

answer['div'] = []
for inp in inputs1:
    x = Node(inp['x'][0])
    y = Node(inp['x'][1])

    z = x / (Node(0.5) + y)
    z.backward()

    answer['div'].append(x.grad)

Node(data=65.0, grad=1.0)
Node(data=[1.], grad=[0.]) Node(data=[2.], grad=[0.])
TensorSum: 0.5 + 4.0 = 4.5
TensorDiv: -1.0 / 4.5 = -0.2222222222222222
TensorSum: 0.5 + -3.0 = -2.5
TensorDiv: 4.0 / -2.5 = -1.6
TensorSum: 0.5 + -3.0 = -2.5
TensorDiv: -4.0 / -2.5 = 1.6
TensorSum: 0.5 + -5.0 = -4.5
TensorDiv: 8.0 / -4.5 = -1.7777777777777777
TensorSum: 0.5 + -5.0 = -4.5
TensorDiv: 9.0 / -4.5 = -2.0
TensorSum: 0.5 + -8.0 = -7.5
TensorDiv: 5.0 / -7.5 = -0.6666666666666666
TensorSum: 0.5 + -10.0 = -9.5
TensorDiv: 1.0 / -9.5 = -0.10526315789473684
TensorSum: 0.5 + -5.0 = -4.5
TensorDiv: 8.0 / -4.5 = -1.7777777777777777
TensorSum: 0.5 + -1.0 = -0.5
TensorDiv: 6.0 / -0.5 = -12.0
TensorSum: 0.5 + 6.0 = 6.5
TensorDiv: -6.0 / 6.5 = -0.9230769230769231
TensorSum: 0.5 + -9.0 = -8.5
TensorDiv: -5.0 / -8.5 = 0.5882352941176471
TensorSum: 0.5 + -5.0 = -4.5
TensorDiv: 6.0 / -4.5 = -1.3333333333333333
TensorSum: 0.5 + 0.0 = 0.5
TensorDiv: -10.0 / 0.5 = -20.0
TensorSum: 0.5 + 8.0 = 8.5
TensorDiv: 3.0 / 8.5

In [57]:
# TEST 6

x = Node(numpy.array([1]))
y = Node(numpy.array([2]))

z = -x

print(z)
z.backward()
print(x)

answer['neg'] = []
for inp in inputs1:
    x = Node(inp['x'][0])

    z = -x
    z.backward()

    answer['neg'].append(x.grad)

TensorNeg: -[1.] = [-1.]
Node(data=[-1.], grad=[0.])
Node(data=[1.], grad=[-1.])
TensorNeg: --1.0 = 1.0
TensorNeg: -4.0 = -4.0
TensorNeg: --4.0 = 4.0
TensorNeg: -8.0 = -8.0
TensorNeg: -9.0 = -9.0
TensorNeg: -5.0 = -5.0
TensorNeg: -1.0 = -1.0
TensorNeg: -8.0 = -8.0
TensorNeg: -6.0 = -6.0
TensorNeg: --6.0 = 6.0
TensorNeg: --5.0 = 5.0
TensorNeg: -6.0 = -6.0
TensorNeg: --10.0 = 10.0
TensorNeg: -3.0 = -3.0
TensorNeg: --5.0 = 5.0
TensorNeg: --7.0 = 7.0
TensorNeg: --3.0 = 3.0
TensorNeg: -9.0 = -9.0
TensorNeg: --6.0 = 6.0
TensorNeg: --10.0 = 10.0
TensorNeg: --2.0 = 2.0
TensorNeg: --2.0 = 2.0
TensorNeg: -8.0 = -8.0
TensorNeg: --5.0 = 5.0
TensorNeg: --1.0 = 1.0
TensorNeg: --2.0 = 2.0
TensorNeg: -6.0 = -6.0
TensorNeg: -9.0 = -9.0
TensorNeg: --10.0 = 10.0
TensorNeg: -2.0 = -2.0
TensorNeg: -2.0 = -2.0
TensorNeg: -1.0 = -1.0
TensorNeg: --8.0 = 8.0
TensorNeg: -1.0 = -1.0
TensorNeg: --2.0 = 2.0
TensorNeg: -7.0 = -7.0
TensorNeg: --10.0 = 10.0
TensorNeg: --4.0 = 4.0
TensorNeg: -1.0 = -1.0
TensorNeg: --3

In [58]:
# TEST 7

x = Node(numpy.array([1]))
y = Node(numpy.array([2]))

z = (x + y).exp()

print(z)
print(x.grad, y.grad)

z.backward()

print(x.grad, y.grad)

answer['exp'] = []
for inp in inputs1:
    x = Node(inp['x'][0])

    z = x.exp()
    z.backward()

    answer['exp'].append(x.grad)

TensorSum: [1.] + [2.] = [3.]
TensorExp: exp([3.]) = [20.08553692]
Node(data=[20.08553692], grad=[0.])
[0.] [0.]
[20.08553692] [20.08553692]
TensorExp: exp(-1.0) = 0.36787944117144233
TensorExp: exp(4.0) = 54.598150033144236
TensorExp: exp(-4.0) = 0.01831563888873418
TensorExp: exp(8.0) = 2980.9579870417283
TensorExp: exp(9.0) = 8103.083927575384
TensorExp: exp(5.0) = 148.4131591025766
TensorExp: exp(1.0) = 2.718281828459045
TensorExp: exp(8.0) = 2980.9579870417283
TensorExp: exp(6.0) = 403.4287934927351
TensorExp: exp(-6.0) = 0.0024787521766663585
TensorExp: exp(-5.0) = 0.006737946999085467
TensorExp: exp(6.0) = 403.4287934927351
TensorExp: exp(-10.0) = 4.5399929762484854e-05
TensorExp: exp(3.0) = 20.085536923187668
TensorExp: exp(-5.0) = 0.006737946999085467
TensorExp: exp(-7.0) = 0.0009118819655545162
TensorExp: exp(-3.0) = 0.049787068367863944
TensorExp: exp(9.0) = 8103.083927575384
TensorExp: exp(-6.0) = 0.0024787521766663585
TensorExp: exp(-10.0) = 4.5399929762484854e-05
TensorEx

# Task

Implement Graph function from the previous task. For the constants please use `Node(1)` whenever needed (otherwise you will get an error)

In [59]:
# TEST 8 (Graph)

def sigmoid(z):
    return Node(1.0) / (Node(1.0) + (-z).exp())


def tanh(z):
    exp_z = z.exp()
    exp_neg_z = (-z).exp()
    return (exp_z - exp_neg_z) / (exp_z + exp_neg_z)

def graph_value(x, w):

    x1, x2 = x
    b1, b2, c1, c2 = w

    z = []
    z.append(x1 + b1)
    z.append(x2 + b2)

    z.append(sigmoid(z[0]))
    z.append(sigmoid(z[1])) 
    z.append(tanh(z[3]))

    z.append(z[4] * c2)
    z.append(z[0] * z[2])
    z.append(z[6] * c1)
    z.append(z[2] * z[5])

    y = z[7] + z[8]
    
    return y

answer['graph'] = []
for inp in inputs1:

    x = inp['x']
    w = inp['w']

    x = [Node(float(val)) for val in x]
    w = [Node(float(val)) for val in w]

    y = graph_value(x, w)
    y.backward()

    answer['graph'].append([x[0].grad, x[1].grad, w[0].grad, w[1].grad, w[2].grad, w[3].grad])

print("OUR RESULTS:")
print(x[0].grad, x[1].grad, w[0].grad, w[1].grad, w[2].grad, w[3].grad)

def torch_graph_value(x, w):
    x1, x2 = x
    b1, b2, c1, c2 = w

    z0 = x1 + b1
    z1 = x2 + b2

    z2 = torch.sigmoid(z0)
    z3 = torch.sigmoid(z1)
    z4 = torch.tanh(z3)

    z5 = z4 * c2
    z6 = z0 * z2
    z7 = z6 * c1
    z8 = z2 * z5

    y = z7 + z8

    return y

import torch
x = torch.tensor(inp['x'], requires_grad=True, dtype=float)
w = torch.tensor(inp['w'], requires_grad=True, dtype=float)

y = torch_graph_value(x, w)
y.backward()

print("TORCH RESULTS:")
print(x.grad, w.grad)
    

TensorSum: -1.0 + -7.0 = -8.0
TensorSum: 4.0 + -6.0 = -2.0
TensorNeg: --8.0 = 8.0
TensorExp: exp(8.0) = 2980.9579870417283
TensorSum: 1.0 + 2980.9579870417283 = 2981.9579870417283
TensorDiv: 1.0 / 2981.9579870417283 = 0.0003353501304664781
TensorNeg: --2.0 = 2.0
TensorExp: exp(2.0) = 7.38905609893065
TensorSum: 1.0 + 7.38905609893065 = 8.38905609893065
TensorDiv: 1.0 / 8.38905609893065 = 0.11920292202211755
TensorExp: exp(0.11920292202211755) = 1.1265985067418556
TensorNeg: -0.11920292202211755 = -0.11920292202211755
TensorExp: exp(-0.11920292202211755) = 0.8876276632853164
TensorSub: 1.1265985067418556 - 0.8876276632853164 = 0.23897084345653918
TensorSum: 1.1265985067418556 + 0.8876276632853164 = 2.0142261700271717
TensorDiv: 0.23897084345653918 / 2.0142261700271717 = 0.11864151454914097
TensorMul: 0.11864151454914097 * 0.0 = 0.0
TensorMul: -8.0 * 0.0003353501304664781 = -0.002682801043731825
TensorMul: -0.002682801043731825 * 5.0 = -0.013414005218659125
TensorMul: 0.00033535013046647

# 2-layer NN

Implement 2 layer Neural Network and compute its gradient using `Node` class:

$\mathbf y = \sigma( W_2 \sigma(W_1 \mathbf x + \mathbf b_1) + \mathbf b_2)$

Return sum of all values in $y * y$ as loss function

In [41]:
# TEST 9 (Two-layer net)

def two_layer_net(x, W1, W2, b1, b2):
    h = (W1 @ x) + b1
    h = sigmoid(h)
    y = (W2 @ h) + b2
    y = sigmoid(y)
    return y.sum()



answer['two_layer_net'] = []
for inp in inputs2:
    x = Node(inp['x'])
    W1 = Node(inp['W1'])
    W2 = Node(inp['W2'])
    b1 = Node(inp['b1'])
    b2 = Node(inp['b2'])

    h_hat = two_layer_net(x, W1, W2, b1, b2)
    h_hat.backward()

    answer['two_layer_net'].append([x.grad, W1.grad, W2.grad, b1.grad, b2.grad])


# Conclusion

You have implemented a backpropagation algorithm. This algorithm is similar to one that is used in Torch. Note that you have implemented all the mechanics of it. Thus it should be now not a magical box: you know exactly how it works.

In [31]:
json_tricks.dump(answer, '.answer.json')

'{"init": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "sum": [4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 