## 수치 미분을 이용한 심층 신경망 학습

In [2]:
import time
import numpy as np

## 유틸리티 함수

In [3]:
def _t(x):
    return np.transpose(x)

def _m(A, B):
    return np.matmul(A, B)

## Sigmoid 구현
#### sigmoid를 객체로 만든 이유 : DP를 사용하기 위함 (많은 중복 연산을 피하기 위해)

In [4]:
class Sigmoid:
    def __init__(self): # sigmoid(x)를 저장하기 위함
        self.last_o = 1

    def __call__(self, x):
        self.last_o = 1 / (1.0+ np.exp(-x))
        return self.last_o

    # dh(n) / dh(n-1)
    def grad(self): #sigmoid(x) / (1-sigmoid(x))
        return self.last_o / (1- self.last_o)

## Mean Squared Error 구현

In [5]:
class MeanSquaredError:
    def __init__(self):
        #gradient : chain rule을 위한
        self.dh = 1
        # backpropagation을 위한
        self.last_diff = 1

    def __call__(self, h, y): # 1/2 mean((h-y)^2)
        self.last_diff = h-y
        return 1/2 * np.mean(np.square(h-y))

    # dh(n) / dh(n-1)
    def grad(self): # (h-y)
        return self.last_diff


## 뉴런 구현

In [34]:
class Neuron:
    def __init__(self, W, b, a_obj):
        self.W = W
        self.b = b
        self.a = a_obj()
        
        #graident
        self.dW = np.zeros_like(W)
        self.db = np.zeros_like(b)
        self.dh = np.zeros_like(_t(W)) # h(n)j / h(n-1)i    j,i=0 ~ layer
        
        # 입력 변수 저장
        self.last_x = np.zeros((W.shape[0])) 
        # 출력 변수 저장
        self.last_h = np.zeros((W.shape[1]))

    def __call__(self, x):
        self.last_x = x
        self.last_h = _m(_t(self.W),x)+self.b
        return self.a(self.last_h)

    # dh(n) / dh(n-1)
    def grad(self): #dy/dh = W * da/dh
        return self.a.grad()*self.W

    # parameter dh : 여태껏 넘어온 gradient
    def grad_W(self, dh):
        grad = np.ones_like(self.W)
        grad_a = self.a.grad()
        for j in range(grad.shape[1]):
            grad[:, j] = dh[j] * grad_a[j] * self.last_x
        return grad
    
    def grad_b(self, dh): # y = Wx + b  -> dy /dh = 1
        return dh * self.a.grad()        

## 심층신경망 구현

In [35]:
class DNN:
    def __init__(self, hidden_depth, num_neuron, input, output, activation=Sigmoid):
        def init_var(i, o):
            return np.random.normal(0.0, 0.01, (i, o)), np.zeros((o,))

        self.sequence = list()
        # First hidden layer
        W, b = init_var(input, num_neuron)
        self.sequence.append(Neuron(W, b, activation))

        # Hidden Layers
        for index in range(hidden_depth):
            W, b = init_var(num_neuron, num_neuron)
            self.sequence.append(Neuron(W, b, activation))

        # Output Layer
        W, b = init_var(num_neuron, output)
        self.sequence.append(Neuron(W, b, activation))

    def __call__(self, x):
        for layer in self.sequence:
            x = layer(x)
        return x

    def calc_gradient(self, loss_obj):
        loss_obj.dh = loss_obj.grad()
        self.sequence.append(loss_obj)
        
        #back propagation loop
        for i in range(len(self.sequence) - 1,0,-1):
            l1 = self.sequence[i]
            l0 = self.sequence[i-1]
            
            l0.dh = _m(l0.grad(),l1.dh)
            l0.dW = l0.grad_W(l1.dh)
            l0.db = l0.grad_b(l1.dh)
        
        self.sequence.remove(loss_obj)

## 경사하강 학습법

In [36]:
def gradient_descent(network, x, y, loss_obj, alpha=0.01):
    loss = loss_obj(network(x), y)  # Forward inference
    network.calc_gradient(loss_obj)  # Back-propagation
    for layer in network.sequence:
        layer.W += -alpha * layer.dW
        layer.b += -alpha * layer.db
    return loss

## 동작 테스트

In [37]:
x = np.random.normal(0.0, 1.0, (10,))
y = np.random.normal(0.0, 1.0, (2,))

t = time.time()
dnn = DNN(hidden_depth=5, num_neuron=32, input=10, output=2, activation=Sigmoid)
loss_obj = MeanSquaredError()
for epoch in range(100):
    loss = gradient_descent(dnn, x, y, loss_obj, alpha=0.01)
    print('Epoch {}: Test loss {}'.format(epoch, loss))
print('{} seconds elapsed.'.format(time.time() - t))

Epoch 0: Test loss 0.9137901280884936
Epoch 1: Test loss 0.8738415675977791
Epoch 2: Test loss 0.8417730882285664
Epoch 3: Test loss 0.8152304284573466
Epoch 4: Test loss 0.792771026810941
Epoch 5: Test loss 0.7734432598345216
Epoch 6: Test loss 0.756585775672981
Epoch 7: Test loss 0.7417205755661107
Epoch 8: Test loss 0.728491377958226
Epoch 9: Test loss 0.716625914175404
Epoch 10: Test loss 0.7059117629397418
Epoch 11: Test loss 0.6961802661923765
Epoch 12: Test loss 0.6872954831581489
Epoch 13: Test loss 0.6791464010257032
Epoch 14: Test loss 0.6716413159475775
Epoch 15: Test loss 0.6647036988610066
Epoch 16: Test loss 0.6582691005802521
Epoch 17: Test loss 0.6522827990301241
Epoch 18: Test loss 0.6466979859400355
Epoch 19: Test loss 0.6414743519453167
Epoch 20: Test loss 0.6365769701564511
Epoch 21: Test loss 0.6319754062357377
Epoch 22: Test loss 0.6276430024046187
Epoch 23: Test loss 0.6235562964532844
Epoch 24: Test loss 0.6196945465770277
Epoch 25: Test loss 0.6160393399277158


### DP를 사용한 BackPropagation 방법은 매우 빠르다.