In [1]:
import time
import numpy as np

#### 유틸리티 함수

In [2]:
def _t(x):
    return np.transpose(x)

def _m(A, B):
    return np.matmul(A, B)

#### Sigmoid 구현

In [3]:
class Sigmoid:
    def __init__(self):
        self.last_o = 1
        
    def __call__(self, x):
        self.last_o = 1 / (1.0 + np.exp(-x))
        return self.last_o
    
    def grad(self): # sigmoid(x)(1 - sigmoid(x))
        return self.last_o * (1 - self.last_o)

#### Mean Squared Error 구현

In [4]:
class MeanSquaredError:
    def __init__(self):
        # gradient
        self.dh = 1
        self.last_diff = 1
        
    def __call__(self, h, y): # 1/2 * mean ((h - y)^2)
        self.last_diff = h - y
        return 1 / 2 * np.mean(np.square(self.last_diff))
    
    def grad(self): # h - y
        return self.last_diff

#### Neuron 구현

In [5]:
class Neuron:
    def __init__(self, W, b, a_obj):
        # Model parameters
        self.W = W
        self.b = b
        self.a = a_obj()
        
        # gradient
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        self.dh = np.zeros_like(_t(self.W))
        
        self.last_x = np.zeros((self.W.shape[0]))
        self.last_h = np.zeros((self.W.shape[1]))
        
    def __call__(self, x):
        self.last_x = x
        self.last_h = _m(_t(self.W), x) + self.b
        return self.a(self.last_h)
    
    def grad(self): # dy/dh = W
        return self.W + self.a.grad()
    
    def grad_W(self, dh):
        grad = np.ones_like(self.W)
        grad_a = self.a.grad()
        for j in range(grad.shape[1]): # dy/dw = x
            grad[:, j] = dh[j] * grad_a[j] * self.last_x
        return grad
    
    def grad_b(self, dh): # dy/db = 1
        return dh * self.a.grad() * 1

#### Network 구현

In [6]:
class DNN:
    def __init__(self, hidden_depth, num_neuron, num_input, num_output, activation):
        def init_var(i, o):
            return np.random.normal(0.0, 0.01, (i,o)), np.zeros((o,))
        
        self.sequence = list()
        # First hidden layer
        W, b = init_var(num_input, num_neuron)
        self.sequence.append(Neuron(W, b, activation))

        # Hidden layers
        for _ in range(hidden_depth - 1):
            W, b = init_var(num_neuron, num_neuron)
            self.sequence.append(Neuron(W, b, activation))

        # Output layer
        W, b = init_var(num_neuron, num_output)
        self.sequence.append(Neuron(W, b, activation))
    
    def __call__(self, x):
        for layer in self.sequence:
            x = layer(x)
        return x
    
    def calc_gradient(self, loss_obj):
        loss_obj.dh = loss_obj.grad()
        self.sequence.append(loss_obj)
        
        # back-prop loop
        for i in range(len(self.sequence) - 1, 0, -1):
            l1 = self.sequence[i]
            l0 = self.sequence[i - 1]
            
            l0.dh = _m(l0.grad(), l1.dh)
            l0.dW = l0.grad_W(l1.dh)
            l0.db = l0.grad_b(l1.dh)
            
        self.sequence.remove(loss_obj)

#### 경사하강 학습법

In [7]:
def gradient_descent(network, x, y, loss_obj, alpha=0.01):
    loss = loss_obj(network(x), y) # Forward inference
    network.calc_gradient(loss_obj) # Back-propagation
    for layer in network.sequence:
        layer.W += -alpha * layer.dW
        layer.b += -alpha * layer.db
    return loss

#### Training

In [8]:
x = np.random.normal(0.0, 1.0, (10,))
y = np.random.normal(0.0, 1.0, (2,))

dnn = DNN(hidden_depth=5,
          num_neuron=32,
          num_input=10,
          num_output=2,
          activation=Sigmoid)
loss_obj = MeanSquaredError()

t = time.time()
for epoch in range(100):
    loss = gradient_descent(dnn, x, y, loss_obj, 0.01)
    print('Epoch {} : Test loss {}'.format(epoch, loss))
print('{} seconds elapsed'.format(time.time() - t))

Epoch 0 : Test loss 0.13884151052177143
Epoch 1 : Test loss 0.13728067722348533
Epoch 2 : Test loss 0.13574879377736898
Epoch 3 : Test loss 0.13424494001155157
Epoch 4 : Test loss 0.13276828125354206
Epoch 5 : Test loss 0.13131801821256817
Epoch 6 : Test loss 0.1298933929727083
Epoch 7 : Test loss 0.1284936928486217
Epoch 8 : Test loss 0.12711825011646882
Epoch 9 : Test loss 0.1257664395254039
Epoch 10 : Test loss 0.12443767483204082
Epoch 11 : Test loss 0.12313140502886816
Epoch 12 : Test loss 0.12184711061976611
Epoch 13 : Test loss 0.12058430012289051
Epoch 14 : Test loss 0.11934250688499844
Epoch 15 : Test loss 0.11812128623687261
Epoch 16 : Test loss 0.1169202129887712
Epoch 17 : Test loss 0.11573887924799527
Epoch 18 : Test loss 0.1145768925321265
Epoch 19 : Test loss 0.11343387414794867
Epoch 20 : Test loss 0.11230945780550068
Epoch 21 : Test loss 0.11120328843786668
Epoch 22 : Test loss 0.11011502119940206
Epoch 23 : Test loss 0.10904432061763139
Epoch 24 : Test loss 0.10799085