## Back Propagation 구현

In [3]:
import time
import numpy as np

### 1. define function

In [1]:
epsilon = 0.0001

def _t(x):
    return np.transpose(x)

def _m(A,B):
    return np.matmul(A,B)

### 2. Sigmoid 

- 따로 class 만드는 이유 : dynamic learning!

In [2]:
class Sigmoid:
    def __init__(self):
        self.last_o = 1
        
    def __call__(self,x):
        self.last_o = 1/(1.0+np.exp(-x))
        return self.last_o
    
    def grad(self):
        return self.last_o * (1-self.last_o)

### 3. MSE ( Mean Squared Error )

In [4]:
class MSE:
    def __init__(self):
        # gradient
        self.dh=1
        self.last_diff = 1
        
    def __call__(self,h,y) : # 1/2 * mean((h-y)^2)
        self.last_diff = h-y
        return 1/2*np.mean(np.square(h-y))
    
    def grad(self):
        return self.last_diff

### 4. Neuron

In [5]:
class Neuron:
    def __init__(self,W,b,a_obj):
        self.W = W
        self.b = b
        self.a = a_obj()
        
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        self.dh = np.zeros_like(_t(self.W))
        
        self.last_x = np.zeros((self.W.shape[0]))
        self.last_h = np.zeros((self.W.shape[1]))
    
    def __call__(self,x):
        self.last_x = x
        self.last_h = _m(_t(self.W),x) + self.b
        return self.a(self.last_h)
        
    def grad(self): # dy/dh = W 
        return self.W * self.a.grad()
    
    def grad_W(self,dh):
        grad = np.ones_like(self.W)
        grad_a = self.a.grad()
        for j in range(grad.shape[1]): # dy/dw = dy/dh * dh/dw
            grad[:,j] = dh[j] * grad_a[j] * self.last_x
        return grad
    
    def grad_b(self,dh): # dy/db = 1
        return dh*self.a.grad()*1 

### 5. DNN ( Deep Neural Network )

In [6]:
class DNN:
    def __init__(self,hidden_depth, num_neuron, num_input, num_output, activation=Sigmoid):
        def init_var(i,o):
            return np.random.normal(0.0, 0.01, (i,o)), np.zeros((o,)) # initialize W & b
        
        self.sequence = list()
        
        # 1st Hidden Layer
        W,b = init_var(num_input,num_neuron)
        self.sequence.append(Neuron(W,b,activation))
        
        # 2nd~last Hidden Layer
        for _ in range(hidden_depth - 1):
            W,b = init_var(num_neuron,num_neuron)
            self.sequence.append(Neuron(W,b,activation))
        
        # Output Layer
        W,b = init_var(num_neuron,num_output)
        self.sequence.append(Neuron(W,b,activation))
        
    def __call__(self,x):
        for layer in self.sequence:
            x = layer(x)
        return x
    
    def calc_grad(self,loss_obj):
        loss_obj.dh = loss_obj.grad()
        self.sequence.append(loss_obj)
        
        # back-propagation
        for i in range(len(self.sequence)-1,0,-1):
            l1 = self.sequence[i]
            l0 = self.sequence[i-1]
            
            l0.dh = _m(l0.grad(), l1.dh)
            l0.dW = l0.grad_W(l1.dh)
            l0.db = l0.grad_b(l1.dh)
            
        self.sequence.remove(loss_obj)      

### 6. Gradient Descent

In [7]:
def gradient_descent(network, x, y, loss_obj, alpha=0.01):
    loss = loss_obj(network(x),y)
    network.calc_grad(loss_obj)
    for layer in network.sequence:
        layer.W += -alpha*layer.dW
        layer.b += -alpha*layer.db
    return loss

In [12]:
x

array([ 1.83953917,  0.91310881,  0.39742267,  0.04061736,  1.00257472,
       -0.02544987,  1.11252069,  0.15905722,  0.92256944,  0.54025473])

In [13]:
y

array([ 0.73625   , -1.32924432])

In [11]:
x = np.random.normal(0.0, 1.0, (10,))
y = np.random.normal(0.0,1.0, (2,))

t = time.time()
dnn = DNN(hidden_depth=5, num_neuron=32, num_input=10, num_output=2, activation=Sigmoid)
loss_obj = MSE()

for epoch in range(10000):
    loss = gradient_descent(dnn,x,y,loss_obj,0.01)
    if epoch%500==0:
        print('Epoch {} : Test Loss {}'.format(epoch, loss))

print('{} seconds elapsed.'.format(time.time()-t))

Epoch 0 : Test Loss 0.8425329724417312
Epoch 500 : Test Loss 0.45237323926306916
Epoch 1000 : Test Loss 0.44661420602273777
Epoch 1500 : Test Loss 0.44485254616474085
Epoch 2000 : Test Loss 0.4440051799358345
Epoch 2500 : Test Loss 0.4435101047315684
Epoch 3000 : Test Loss 0.4431867695329297
Epoch 3500 : Test Loss 0.44295965317498154
Epoch 4000 : Test Loss 0.4427917064254573
Epoch 4500 : Test Loss 0.4426626672127787
Epoch 5000 : Test Loss 0.4425605399759419
Epoch 5500 : Test Loss 0.4424777797050687
Epoch 6000 : Test Loss 0.4424094081086844
Epoch 6500 : Test Loss 0.44235201049856904
Epoch 7000 : Test Loss 0.4423031682457508
Epoch 7500 : Test Loss 0.4422611212877516
Epoch 8000 : Test Loss 0.44222455890632906
Epoch 8500 : Test Loss 0.4421924853560308
Epoch 9000 : Test Loss 0.44216413089933837
Epoch 9500 : Test Loss 0.442138891325052
17.53489875793457 seconds elapsed.


### MUCH FASTER with Back-Prop!