In [884]:
import numpy as np
import random
from sklearn import preprocessing

In [562]:
class Node:
    def __init__(self, inputs=[]):
        self.inputs = inputs
        self.value = None
        self.outputs = []
        
        for n in self.inputs:
            n.outputs.append(self)
        
        self.gradient = {}
        
    def forward(self):
        raise NotImplemented
        
    def backward(self):
        raise NotImplemented

In [618]:
class Input(Node):
    def __init__(self):
        Node.__init__(self)
        
    def forward(self):
        return self.value
        
    def backward(self):
        grad_cost = self.outputs[0].gradient[self]
        self.gradient[self] = grad_cost.reshape

In [564]:
class Add(Node):
    def __init__(self, nodes):
        Node.__init__(self, nodes)
        
    def forward(self):
        self.values = sum([n.value for n in self.inputs])

In [1513]:
class Linear(Node):
    def __init__(self, nodes, weight, bias):
        Node.__init__(self, [nodes, weight, bias])
        
    def forward(self):
        x = self.inputs[0].value
        weight = self.inputs[1].value
        bias = self.inputs[2].value
        
        #weight = weight.reshape(-1, 1)
        self.value = np.dot(x, weight) + bias
        
        return self.value
        
    def backward(self):
        grad_cost = self.outputs[0].gradient[self]
        #print("linear {}".format(grad_cost.shape))
        
        x = self.inputs[0].value
        weight = self.inputs[1].value
        
        #print(x.shape)
        #print(weight.shape)
        #print(grad_cost.shape)
        #grad_cost = grad_cost.reshape(grad_cost.shape[0], -1)
        #weight = weight.reshape(weight.shape[0], -1)
        
        self.gradient[self.inputs[0]] = np.dot(grad_cost, weight.T)
        self.gradient[self.inputs[1]] = np.dot(x.T, grad_cost)
        self.gradient[self.inputs[2]] = np.sum(grad_cost, axis=0, keepdims=False)

In [1474]:
class Sigmod(Node):
    def __init__(self, nodes):
        Node.__init__(self, [nodes])
    
    def _sigmod(self, x):
        return 1.0 / (1.0 + np.exp(-x))
    
    def forward(self):
        self.value = self._sigmod(self.inputs[0].value)
        
        return self.value
        
    def backward(self):
        x = self.inputs[0].value
        grad_cost = self.outputs[0].gradient[self]
        #print("sigmod grad_cose {}".format(grad_cost))
        
        #print(self._sigmod(x))
        
        self.gradient[self.inputs[0]] = grad_cost * self._sigmod(x) * (1 - self._sigmod(x))

In [1475]:
class MSE(Node):
    def __init__(self, y, y_hat):
        Node.__init__(self, [y, y_hat])
        
    def forward(self):
        y = self.inputs[0].value.reshape(-1, 1)
        y_hat = self.inputs[1].value.reshape(-1, 1)
        self.diff = y - y_hat
        
        self.value = np.mean(self.diff * self.diff)
        
        return self.value
    
    def backward(self):
        m = self.diff.shape[0]
        self.gradient[self.inputs[0]] = 2 / m * self.diff
        self.gradient[self.inputs[1]] = -2 / m * self.diff

In [1514]:
x, y = Input(), Input()
w1, b1 = Input(), Input()
w2, b2 = Input(), Input() 

l1 = Linear(x, w1, b1)
s1 = Sigmod(l1)
l2 = Linear(s1, w2, b2)

mse = MSE(y, l2)

In [1515]:
from sklearn.datasets import load_boston

In [1516]:
data = load_boston()

In [1517]:
train_x = data['data']
train_y = data['target']

In [1518]:
train_x = preprocessing.scale(train_x)

In [1519]:
x.value = train_x
y.value = train_y

In [1520]:
w1.value = np.random.randn(13,10)
b1.value = np.zeros(10)
w2.value = np.random.randn(10,1)
b2.value = np.zeros(1)

In [1521]:
graph_list = [x, w1, b1, w2, b2, l1, s1, l2, mse]

In [1525]:
def fit(graph, learn_rate):
    loss = 1
    
    for i in range(5000):
        for node in graph:
            loss = node.forward()

        

        for node in reversed(graph):
            node.backward()
        
        graph[1].value -= learn_rate * graph[1].gradient[graph[1]]
        graph[2].value -= learn_rate * graph[2].gradient[graph[2]]
        graph[3].value -= learn_rate * graph[3].gradient[graph[3]]
        graph[4].value -= learn_rate * graph[4].gradient[graph[4]]
    
        if i % 100 == 0:
            print("loss is {}".format(loss))
    return loss

In [1527]:
fit(graph_list, 0.01)

loss is 5.418679462076831
loss is 5.392494438988442
loss is 5.366717397618454
loss is 5.341413477428093
loss is 5.316638508772757
loss is 5.292433261823772
loss is 5.268821122558174
loss is 5.245808697053098
loss is 5.223388304024107
loss is 5.201541288090174
loss is 5.180241356281748
loss is 5.159457478962694
loss is 5.1391561663595065
loss is 5.11930309336778
loss is 5.099864118120434
loss is 5.08080576639415
loss is 5.062095278470086
loss is 5.0437003773309526
loss is 5.025589052556079
loss is 5.007729885327553
loss is 4.990093721365196
loss is 4.9726575569258635
loss is 4.955410638140654
loss is 4.938360306092909
loss is 4.921532520723938
loss is 4.904964040272249
loss is 4.888690945010128
loss is 4.872742001120388
loss is 4.857139231232892
loss is 4.841901213760457
loss is 4.827044705437071
loss is 4.812583647870625
loss is 4.7985268885489925
loss is 4.784876310850413
loss is 4.7716263564612955
loss is 4.758764969275862
loss is 4.746275374538638
loss is 4.734138007440245
loss is 4

4.600098235170938