In [1]:
import numpy as np
import time

In [2]:
class Var:
    def __init__(self, value, grad = None):
        self.value = value
        if grad == None:
            self.grad = np.zeros(value.shape)
        else:
            self.grad = grad

In [3]:
class PerceptronGate:
    def __init__(self):
        self.x = None
        self.y = None
        self.z = None
    def sigmoid(x):
        return 1/(1+np.exp(-x))
    def forward(self, x, y):
        self.x = x
        self.y = y
        dotProd = self.x.value.dot(self.y.value)
        s = PerceptronGate.sigmoid(dotProd)
        self.z = Var(s, 0.0)
        return self.z
    def backward(self):
        s = self.z.value
        self.x.grad += self.y.value * s * (1 - s) * self.z.grad
        self.y.grad += self.x.value * s * (1 - s) * self.z.grad

In [4]:
class LossGate:
    def __init__(self):
        self.x = None
        self.y = None
        self.z = None
    def forward(self, x, y):
        self.x = x
        self.y = y
        self.z = Var(0.5*(self.x.value-self.y.value)**2, 0.0)
        return self.z
    def backward(self):
        self.x.grad += (self.x.value-self.y.value) * self.z.grad
        self.y.grad += -1.0*(self.x.value-self.y.value) * self.z.grad

In [5]:
perceptron0_0 = PerceptronGate()
perceptron0_1 = PerceptronGate()
perceptron0_2 = PerceptronGate()
perceptron1_0 = PerceptronGate()
loss = LossGate()

w0_0 = Var(np.random.randn(10))
w0_1 = Var(np.random.randn(10))
w0_2 = Var(np.random.randn(10))
w1_0 = Var(np.random.randn(3))

x = Var(np.random.randn(10))
y = Var(1, 0)

In [6]:
# forward pass
def forwardNetwork():
    p0_0 = perceptron0_0.forward(w0_0,x)
    p0_1 = perceptron0_1.forward(w0_1,x)
    p0_2 = perceptron0_2.forward(w0_2,x)
    p0 = Var(np.array([p0_0.value, p0_1.value, p0_2.value]))
    p = perceptron1_0.forward(p0, w1_0)
    return loss.forward(p,y)

In [7]:
# backward pass
def backwardNetwork(output):
    output.grad = 1.0;
    loss.backward()
    perceptron1_0.backward()
    perceptron0_0.z.grad = perceptron1_0.x.grad[0]
    perceptron0_0.backward()
    perceptron0_1.z.grad = perceptron1_0.x.grad[1]
    perceptron0_1.backward()
    perceptron0_2.z.grad = perceptron1_0.x.grad[2]
    perceptron0_2.backward()

In [8]:
# gradient descent
step_size = 0.01;
s = forwardNetwork()
#print(s.value)
while s.value > 1e-3:
    backwardNetwork(s)
    w0_0.value -= step_size * w0_0.grad
    w0_1.value -= step_size * w0_1.grad
    w0_2.value -= step_size * w0_2.grad
    w1_0.value -= step_size * w1_0.grad
    s = forwardNetwork()
    print('current loss: ' + str(s.value))
    time.sleep(0.25)

current loss: 0.14437738409625037
current loss: 0.14397138571705356
current loss: 0.14336261404256898
current loss: 0.14255135077498238
current loss: 0.14153798967390235
current loss: 0.14032305302198575
current loss: 0.13890721337775003
current loss: 0.13729132145843864
current loss: 0.13547644121953703
current loss: 0.13346389344042925
current loss: 0.13125530937918778
current loss: 0.12885269630664545
current loss: 0.12625851694218818
current loss: 0.12347578494717185
current loss: 0.12050817862307131
current loss: 0.11736017472384418
current loss: 0.11403720371393462
current loss: 0.11054582675007682
current loss: 0.10689393298723832
current loss: 0.1030909533619423
current loss: 0.09914808368362509
current loss: 0.09507850565085414
current loss: 0.09089758944861054
current loss: 0.08662305626092846
current loss: 0.08227507405041844
current loss: 0.07787625636297674
current loss: 0.0734515330639196
current loss: 0.06902786527348317
current loss: 0.06463378559782708
current loss: 0.