In [21]:
import numpy as np
import random
from sklearn import preprocessing

In [22]:
class Node:
    def __init__(self, inputs=[]):
        self.inputs = inputs
        self.value = None
        self.outputs = []
        
        for n in self.inputs:
            n.outputs.append(self)
        
        self.gradient = {}
        
    def forward(self):
        raise NotImplemented
        
    def backward(self):
        raise NotImplemented

In [41]:
class Input(Node):
    def __init__(self):
        Node.__init__(self)
        
    def forward(self):
        return self.value
        
    def backward(self):
        grad_cost = self.outputs[0].gradient[self]
        self.gradient[self] = grad_cost

In [24]:
class Add(Node):
    def __init__(self, nodes):
        Node.__init__(self, nodes)
        
    def forward(self):
        self.values = sum([n.value for n in self.inputs])

In [25]:
class Linear(Node):
    def __init__(self, nodes, weight, bias):
        Node.__init__(self, [nodes, weight, bias])
        
    def forward(self):
        x = self.inputs[0].value
        weight = self.inputs[1].value
        bias = self.inputs[2].value
        
        #weight = weight.reshape(-1, 1)
        self.value = np.dot(x, weight) + bias
        
        return self.value
        
    def backward(self):
        grad_cost = self.outputs[0].gradient[self]
        #print("linear {}".format(grad_cost.shape))
        
        x = self.inputs[0].value
        weight = self.inputs[1].value
        
        #print(x.shape)
        #print(weight.shape)
        #print(grad_cost.shape)
        #grad_cost = grad_cost.reshape(grad_cost.shape[0], -1)
        #weight = weight.reshape(weight.shape[0], -1)
        
        self.gradient[self.inputs[0]] = np.dot(grad_cost, weight.T)
        self.gradient[self.inputs[1]] = np.dot(x.T, grad_cost)
        self.gradient[self.inputs[2]] = np.sum(grad_cost, axis=0, keepdims=False)

In [26]:
class Sigmod(Node):
    def __init__(self, nodes):
        Node.__init__(self, [nodes])
    
    def _sigmod(self, x):
        return 1.0 / (1.0 + np.exp(-x))
    
    def forward(self):
        self.value = self._sigmod(self.inputs[0].value)
        
        return self.value
        
    def backward(self):
        x = self.inputs[0].value
        grad_cost = self.outputs[0].gradient[self]
        #print("sigmod grad_cose {}".format(grad_cost))
        
        #print(self._sigmod(x))
        
        self.gradient[self.inputs[0]] = grad_cost * self._sigmod(x) * (1 - self._sigmod(x))

In [27]:
class MSE(Node):
    def __init__(self, y, y_hat):
        Node.__init__(self, [y, y_hat])
        
    def forward(self):
        y = self.inputs[0].value.reshape(-1, 1)
        y_hat = self.inputs[1].value.reshape(-1, 1)
        self.diff = y - y_hat
        
        self.value = np.mean(self.diff * self.diff)
        
        return self.value
    
    def backward(self):
        m = self.diff.shape[0]
        self.gradient[self.inputs[0]] = 2 / m * self.diff
        self.gradient[self.inputs[1]] = -2 / m * self.diff

In [42]:
x, y = Input(), Input()
w1, b1 = Input(), Input()
w2, b2 = Input(), Input() 

l1 = Linear(x, w1, b1)
s1 = Sigmod(l1)
l2 = Linear(s1, w2, b2)

mse = MSE(y, l2)

In [43]:
from sklearn.datasets import load_boston

In [44]:
data = load_boston()

In [45]:
train_x = data['data']
train_y = data['target']

In [46]:
train_x = preprocessing.scale(train_x)

In [47]:
x.value = train_x
y.value = train_y

In [48]:
w1.value = np.random.randn(13,10)
b1.value = np.zeros(10)
w2.value = np.random.randn(10,1)
b2.value = np.zeros(1)

In [49]:
graph_list = [x, w1, b1, w2, b2, l1, s1, l2, mse]

In [50]:
def fit(graph, learn_rate):
    loss = 1
    
    for i in range(5000):
        for node in graph:
            loss = node.forward()

        for node in reversed(graph):
            node.backward()
        
        graph[1].value -= learn_rate * graph[1].gradient[graph[1]]
        graph[2].value -= learn_rate * graph[2].gradient[graph[2]]
        graph[3].value -= learn_rate * graph[3].gradient[graph[3]]
        graph[4].value -= learn_rate * graph[4].gradient[graph[4]]
    
        if i % 100 == 0:
            print("loss is {}".format(loss))
    return loss

In [51]:
fit(graph_list, 0.01)

loss is 571.9800381586145
loss is 28.920231637154146
loss is 22.641604772264063
loss is 19.17678455508939
loss is 15.87765334160036
loss is 13.360110457103682
loss is 11.78977485719423
loss is 10.833928899730477
loss is 10.230108820176618
loss is 9.805312325416917
loss is 9.47749790456153
loss is 9.208704952586242
loss is 8.979483782962108
loss is 8.778959449850392
loss is 8.600365525481104
loss is 8.438982457374289
loss is 8.291522376726967
loss is 8.15602308856178
loss is 8.031551348931384
loss is 7.917629686319493
loss is 7.8136634651185055
loss is 7.718675936759738
loss is 7.631402023190411
loss is 7.550522285483147
loss is 7.4748479709618
loss is 7.403414195063489
loss is 7.335502410412701
loss is 7.270610946766635
loss is 7.208383599077038
loss is 7.148521447798603
loss is 7.090722187115461
loss is 7.034671050126436
loss is 6.980066737109125
loss is 6.9266541488806945
loss is 6.874249266357528
loss is 6.822752621026139
loss is 6.772150457128175
loss is 6.722503252681803
loss is 6

6.19912436253739