무식한 신경망 학습
========
MNIST 이미지 인식 신경망을 역전파 없이 만들어보자. 아래와 같은 구조로 만들것임.

> 입력층(784) &rarr; 은닉층(50 or 100) &rarr; 시그모이드 &rarr; 결과(10) &rarr; 소프트맥스

In [1]:
import numpy as np
import signal
import sys
import mnist

#
# Hyper parameters
#
# 히든레이어 뉴런 수 (ex: 50, 100)
HIDDEN_LAYER_SIZE = 50
# 정규분포 난수로 생성될 초기 가중치의 표준편차
WEIGHT_INIT_STD = 0.01
# 경사하강법을 몇번 적용할지
ITERATION_COUNT = 3
# 학습에 사용할 미니배치의 크기
BATCH_SIZE = 100
# 학습률
LEARNING_RATE = 10
# 에퍼크, 학습 진척도를 얼마나 자주 표시할지 (ex: 100, 300)
EPOCH = 1

#
# Utility functions
#
def sigmoid(x):
    return 1/(1 + np.exp(-x))
def gradient_sigmoid(x):
    return (1.0 - sigmoid(x)) * sigmoid(x)

def softmax(A):
    extend = (lambda x:x) if A.ndim == 1 else (lambda x: x[..., np.newaxis])
    ExpA = np.exp(A - extend(A.max(axis=-1)))
    return ExpA / extend(ExpA.sum(axis=-1))

def cross_entropy_error(expected, actual):
    epsilon = 1E-7
    return -(actual * np.log(expected + epsilon)).sum(axis=-1)
def cross_entropy_error_batch(*args):
    return cross_entropy_error(*args).mean()

def predict(input):
    def network(w0, b0, w1, b1):
        a0 = input @ w0 + b0
        z0 = sigmoid(a0)
        a1 = z0 @ w1 + b1
        z1 = softmax(a1)
        return [a0, z0, a1, z1]
    return network

def accuracy(expected, actual):
    return (expected.argmax(axis=-1) == actual.argmax(axis=-1)).mean()

#
# Main logic
#
MNIST = mnist.load()
TRAIN_IMG = MNIST['train_img']
TRAIN_LABEL = MNIST['train_label']

layer0_size = TRAIN_IMG.shape[-1]
layer1_size = HIDDEN_LAYER_SIZE
layer2_size = TRAIN_LABEL.shape[-1]

# Randomly initialize the parameters
parameters = [
    # w0
    WEIGHT_INIT_STD * np.random.randn(layer0_size, layer1_size), 
    # b0
    np.zeros(layer1_size),
    # w1
    WEIGHT_INIT_STD * np.random.randn(layer1_size, layer2_size),
    # b1
    np.zeros(layer2_size),
]

print('''학습 시작!

반복횟수\t정확도\tLoss
-------------------------------------------''')

try:
    for iteration in range(ITERATION_COUNT):
        # Sample a batch from the train image/label set
        sample = np.random.choice(TRAIN_IMG.shape[0], BATCH_SIZE)
        BATCH_IMG = TRAIN_IMG[sample]
        BATCH_LABEL = TRAIN_LABEL[sample]

        network = predict(BATCH_IMG)

        # Try the result
        if iteration % EPOCH == 0:
            expected = network(*parameters)[-1]
            percentage = accuracy(expected, BATCH_LABEL)*100
            loss = cross_entropy_error_batch(expected, BATCH_LABEL)
            print(f'{iteration:8}\t{percentage:.04}%\t{loss}')

        # Calculate gradient
        def grad(parameters, h=1E-4):
            def loss_function(*arguments):
                expected = network(*arguments)[-1]
                return cross_entropy_error_batch(expected, BATCH_LABEL)
            def grad(param):
                shape = param.shape
                gradient = np.empty(shape)
                for j in np.ndindex(shape):
                    orig = param[j]
                    param[j] = orig + h
                    y2 = loss_function(*parameters)
                    param[j] = orig - h
                    y1 = loss_function(*parameters)
                    param[j] = orig
                    gradient[j] = (y2 - y1)/(2*h)
                return gradient
            return [grad(param) for param in parameters]

        # Update parameters using gradient descent method
        gradient = grad(parameters)
        for param, grad in zip(parameters, gradient):
            param -= LEARNING_RATE * grad

    expected = predict(MNIST['test_img'])(*parameters)[-1]
    TEST_LABEL = MNIST['test_label']
    percentage = accuracy(expected, TEST_LABEL)*100

    print(f'''
    학습 완료!

    최종 점수
    -------------
    정확도 : {percentage}%
    ''')
except KeyboardInterrupt:
    print('\n\x1b[31mInterrupted!!!\x1b[31m')

학습 시작!

반복횟수	정확도	Loss
-------------------------------------------
       0	8.0%	2.3048960747448994

[31mInterrupted!!![31m


**느려서 못해먹겠다!**