# Miniflow

In [64]:
import numpy as np

In [65]:
class Node(object):
    def __init__(self, inbound_nodes=[]):
        self.inbound_nodes = inbound_nodes
        self.outbound_nodes = []
        self.value = None
        self.gradients ={}
        for n in self.inbound_nodes:
            n.outbound_nodes.append(self)
            
    def forward(self):
        raise NotImplementedError
        
    def backward(self):
        raise NotImplementedError

In [66]:
class Input(Node):
    def __init__(self):
        Node.__init__(self)
        
    def forward(self):
        pass
    
    def backward(self):
        self.gradients = {self: 0}
        for node in self.outbound_nodes:
            self.gradients[self] += node.gradients[self] * 1

In [67]:
class Add(Node):
    def __init__(self, *inputs):
        Node.__init__(self, inputs)
        self.value = 0
        
    def forward(self):
        for x in self.inbound_nodes:
            self.value = self.value + x.value

In [68]:
class Mul(Node):
    def __init__(self, *inputs):
        Node.__init__(self, inputs)
        self.value = 1
        
    def forward(self):
        for x in self.inbound_nodes:
            self.value *= x.value

In [69]:
class Linear(Node):
    def __init__(self, inputs, weights, bias):
        Node.__init__(self, [inputs, weights, bias])
        
    def forward(self):
        X = self.inbound_nodes[0].value
        W = self.inbound_nodes[1].value
        b = self.inbound_nodes[2].value
#         print(inputs.value)
#         print(weights.value)
#         print(bias.value)
        self.value = np.dot(X, W) + b
    
    def backward(self):
        self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
        for n in self.outbound_nodes:
            grad_cost = n.gradients[self]
            self.gradients[self.inbound_nodes[0]] += np.dot(grad_cost, self.inbound_nodes[1].value.T)
            self.gradients[self.inbound_nodes[1]] += np.dot(self.inbound_nodes[0].value.T, grad_cost)
            self.gradients[self.inbound_nodes[2]] += np.sum(grad_cost, axis=0, keepdims=False)
            

In [93]:
class Sigmoid(Node):
    def __init__(self, node):
        Node.__init__(self, [node])
        
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def forward(self):
        linear = self.inbound_nodes[0]
        self.value = self.sigmoid(linear.value)
#         when did the Linear.forward executed?
#         X = linear.inbound_nodes[0].value
#         W = linear.inbound_nodes[1].value
#         b = linear.inbound_nodes[2].value
#         self.value = self.sigmoid(np.dot(X, W) + b)
    def backward(self):
        self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
        for n in self.outbound_nodes:
            grad_cost = n.gradients[self]
            sigmoid_prime = self.value * (1 - self.value)
#             print(sigmoid_prime)
#             print(grad_cost)
            self.gradients[self.inbound_nodes[0]] += grad_cost * sigmoid_prime
            

In [71]:
class MSE(Node):
    def __init__(self, y, a):
        Node.__init__(self, [y, a])
        
    def forward(self):
        y = self.inbound_nodes[0].value.reshape(-1, 1)
        a = self.inbound_nodes[1].value.reshape(-1, 1)
#         print(y)
#         print(a)
        # mean square error, so easy
        self.m = self.inbound_nodes[0].value.shape[0]
        self.diff = y - a
        self.value = np.mean(self.diff ** 2)
        
    def backward(self):
        self.gradients[self.inbound_nodes[0]] = (2 / self.m) * self.diff
        self.gradients[self.inbound_nodes[1]] = (-2 / self.m) * self.diff

What is the goal of this method?  
How does this mehtod achieve the goal?

In [72]:
def topogical_sort(feed_dict):
    input_nodes = [n for n in feed_dict.keys()]

    G = {}
    nodes = [n for n in input_nodes]
#     print(nodes[0])
    while len(nodes) > 0:
        n = nodes.pop(0)
        if n not in G:
            G[n] = {'in': set(), 'out': set()}
        for m in n.outbound_nodes:
            if m not in G:
                G[m] = {'in': set(), 'out': set()}
            G[n]['out'].add(m)
            G[m]['in'].add(n)
            nodes.append(m)
            
#     print(G)

    L = []
    S = set(input_nodes)
    while len(S) > 0:
        n = S.pop()

        if isinstance(n, Input):
            n.value = feed_dict[n]

        L.append(n)
        for m in n.outbound_nodes:
            G[n]['out'].remove(m)
            G[m]['in'].remove(n)
            # if no other incoming edges add to S
            if len(G[m]['in']) == 0:
                S.add(m)
    return L

In [73]:
def forward_pass(output_node, sorted_nodes):
    for n in sorted_nodes:
        n.forward()
        print(type(n))
    return output_node.value

the core operation: backpropagation

In [101]:
def BP(graph):
    # forward pass
    for node in graph:
        node.forward()
        
    # backward pass    
    for node in graph[::-1]:
        node.backward()

In [143]:
def SGD(trainables, learning_rate=1e-2):
    for train in trainables:
        gradient = train.gradients[train] # ?？?
        train.value = train.value - learning_rate * gradient

# neural network

## add

In [75]:
x, y, z = Input(), Input(), Input()
f = Add(x, y, z)
feed_dict = {x: 4, y: 5, z: 10}
graph = topogical_sort(feed_dict)
output_add = forward_pass(f, graph)
print("{} + {} + {} = {} (according to miniflow)".format(feed_dict[x], feed_dict[y], feed_dict[z], output_add))

<class '__main__.Input'>
<class '__main__.Input'>
<class '__main__.Input'>
<class '__main__.Add'>
4 + 5 + 10 = 19 (according to miniflow)


## multiple

In [76]:

x, y, z = Input(), Input(), Input()
h = Mul(x, y, z)
feed_dict = {x: 4, y: 5, z: 10}
graph = topogical_sort(feed_dict)
output_mul = forward_pass(h, graph)
print("{} * {} * {} = {} (according to miniflow)".format(feed_dict[x], feed_dict[y], feed_dict[z], output_mul))

<class '__main__.Input'>
<class '__main__.Input'>
<class '__main__.Input'>
<class '__main__.Mul'>
4 * 5 * 10 = 200 (according to miniflow)


## linear

In [77]:
inputs, weights, bias = Input(), Input(), Input()
f = Linear(inputs, weights, bias)
feed_dict = {inputs: [6, 14, 3], weights: [0.5, 0.25, 1.4], bias: 2}
graph = topogical_sort(feed_dict)
output = forward_pass(f, graph)
print(output)

<class '__main__.Input'>
<class '__main__.Input'>
<class '__main__.Input'>
<class '__main__.Linear'>
12.7


## sigmoid

In [78]:
X, W, b = Input(), Input(), Input()

f = Linear(X, W, b)
g = Sigmoid(f)

X_value = np.array([[-1., -2.], [-1., -2.]])
W_value = np.array([[2., -3.], [2., -3.]])
b_value = np.array([-3., -5.])

feed_dict = {X: X_value, W: W_value, b: b_value}
graph = topogical_sort(feed_dict)
output = forward_pass(g, graph)
print(output)

<class '__main__.Input'>
<class '__main__.Input'>
<class '__main__.Input'>
<class '__main__.Linear'>
<class '__main__.Sigmoid'>
[[  1.23394576e-04   9.82013790e-01]
 [  1.23394576e-04   9.82013790e-01]]


In [79]:
y, a = Input(), Input()
cost = MSE(y, a)

y_value = np.array([1, 2, 3])
a_value = np.array([4.5, 5, 10])

feed_dict = {y: y_value, a: a_value}
graph = topogical_sort(feed_dict)
# forward_pass(graph)
for n in graph:
    n.forward()
    
print(cost.value)

23.4166666667


# backpropagation

In [96]:
X, W, b = Input(), Input(), Input()
y = Input()
f = Linear(X, W, b)
a = Sigmoid(f)
cost = MSE(y, a)

X_value = np.array([[-1., -2.], [-1., -2.]])
W_value = np.array([[2.], [3.]])
b_value = np.array([-3.])
y_value = np.array([1, 2])

feed_dict = {X: X_value, y: y_value, W: W_value, b: b_value}

graph = topogical_sort(feed_dict)
BP(graph)
gradients = [t.gradients[t] for t in [X, y, W, b]]
print(gradients[0])
print(gradients[1])
print(gradients[2])
print(gradients[3])

[[ -3.34017280e-05  -5.01025919e-05]
 [ -6.68040138e-05  -1.00206021e-04]]
[[ 0.9999833]
 [ 1.9999833]]
[[  5.01028709e-05]
 [  1.00205742e-04]]
[ -5.01028709e-05]


# stochastic gradient descent

In [148]:
from sklearn.datasets import load_boston
from sklearn.utils import shuffle, resample

# Load data
data = load_boston()
# print(data)
X_ = data['data']
y_ = data['target']

# Normalize data
X_ = (X_ - np.mean(X_, axis=0)) / np.std(X_, axis=0)

n_features = X_.shape[1]
n_hidden = 10
W1_ = np.random.randn(n_features, n_hidden)
b1_ = np.zeros(n_hidden)
W2_ = np.random.randn(n_hidden, 1)
b2_ = np.zeros(1)

# Neural network
X, y = Input(), Input()
W1, b1 = Input(), Input()
W2, b2 = Input(), Input()

l1 = Linear(X, W1, b1)
s1 = Sigmoid(l1)
l2 = Linear(s1, W2, b2)
cost = MSE(y, l2)

feed_dict = {X: X_, y: y_, W1: W1_, b1: b1_, W2: W2_, b2: b2_}

epochs = 100
# Total number of examples
m = X_.shape[0]
batch_size = 11
steps_per_epoch = m // batch_size 
print(steps_per_epoch)

graph = topogical_sort(feed_dict)
trainables = [W1, b1, W2, b2]

print("Total number of examples = {}".format(m))

# Step 4
for i in range(epochs):
#     print("*********** epoch:", i, "*************")
    loss = 0
    for j in range(steps_per_epoch):
        # Step 1
        # Randomly sample a batch of examples
        X_batch, y_batch = resample(X_, y_, n_samples=batch_size)

        # Reset value of X and y Inputs
        X.value = X_batch
        y.value = y_batch

        # Step 2
        BP(graph)

        # Step 3
        SGD(trainables)

        loss += graph[-1].value

    print("Epoch: {}, Loss: {:.3f}".format(i+1, loss/steps_per_epoch))


46
Total number of examples = 506
Epoch: 1, Loss: 120.841
Epoch: 2, Loss: 41.989
Epoch: 3, Loss: 27.974
Epoch: 4, Loss: 26.604
Epoch: 5, Loss: 22.233
Epoch: 6, Loss: 18.841
Epoch: 7, Loss: 17.920
Epoch: 8, Loss: 18.798
Epoch: 9, Loss: 18.804
Epoch: 10, Loss: 15.897
Epoch: 11, Loss: 17.410
Epoch: 12, Loss: 15.659
Epoch: 13, Loss: 14.631
Epoch: 14, Loss: 16.686
Epoch: 15, Loss: 16.607
Epoch: 16, Loss: 16.552
Epoch: 17, Loss: 12.978
Epoch: 18, Loss: 10.758
Epoch: 19, Loss: 10.784
Epoch: 20, Loss: 13.849
Epoch: 21, Loss: 8.970
Epoch: 22, Loss: 12.179
Epoch: 23, Loss: 10.602
Epoch: 24, Loss: 8.556
Epoch: 25, Loss: 13.707
Epoch: 26, Loss: 11.858
Epoch: 27, Loss: 12.923
Epoch: 28, Loss: 9.465
Epoch: 29, Loss: 10.165
Epoch: 30, Loss: 11.731
Epoch: 31, Loss: 9.839
Epoch: 32, Loss: 11.684
Epoch: 33, Loss: 9.831
Epoch: 34, Loss: 11.676
Epoch: 35, Loss: 10.419
Epoch: 36, Loss: 9.862
Epoch: 37, Loss: 10.067
Epoch: 38, Loss: 12.902
Epoch: 39, Loss: 8.596
Epoch: 40, Loss: 10.881
Epoch: 41, Loss: 9.35