# 신경망 학습

In [1]:
import numpy as np
from dataset.mnist import load_mnist

## Helper functions

In [2]:
# 손실함수 (Cross Entropy)
def cross_entropy_error(y, t):
    delta = 1e-7 # 0.0000001
    
    if y.ndim == 1:
        t = t.reshape(1, t.size) 
        y = y.reshape(1, y.size)
        
    batch_size = y.shape[0]
    return -np.sum(t*np.log(y+delta))/batch_size

In [3]:
# 신경망에서 사용할 W(Matrix 형태)의 편미분 행렬을 구하는 함수
# 신경망의 기울기 : 그레디언트 (편미분 벡터)
def numerical_gradient(f, x): # x의 shape이 (784, 20) => grads 도 (784, 20)
    h = 1e-4 # 0.0001
    grads = np.zeros_like(x)
    
    it = np.nditer(x, flags=["multi_index"], op_flags=["readwrite"])
        
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]

        x[idx] = tmp_val + h
        fxh1 = f(x) # f(x+h)
        x[idx] = tmp_val - h
        fxh2 = f(x) # f(x-h)
    
        grads[idx] = (fxh1 - fxh2) / (2*h)
        x[idx] = tmp_val
        it.iternext()
        
    return grads

In [4]:
# Softmax
def softmax(x):
    if x.ndim == 2:
        x = x.T # 10*100
        x = x - np.max(x, axis=0) # 10*100 - 100 = 10*100
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T 

    x = x - np.max(x) # 오버플로 대책
    return np.exp(x) / np.sum(np.exp(x))

In [5]:
# Sigmoid
def sigmoid(x):
    return 1/(1+np.exp(-x))

## 2층 신경망 구현하기

In [6]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 모델파라미터 초기화
        # W1 shape (784, 20), b1 shape (20,), W2 shape (20, 10), b2 shape (10,)
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2'] 
    
        a1 = np.dot(x, W1) + b1  # (20,)
        z1 = sigmoid(a1)         # (20,)
        a2 = np.dot(z1, W2) + b2 # (10,)
        y = softmax(a2)          # (10,)
        return y
    
    def loss(self, x, t):
        y = self.predict(x)
        loss = cross_entropy_error(y, t)
        return loss
    
    def numerical_gradient(self, x, t):
        f = lambda w : self.loss(x, t)
        
        grads = {}
        grads['W1'] = numerical_gradient(f, self.params['W1']) # W1 (784, 20) --> dW (784, 20)
        grads['b1'] = numerical_gradient(f, self.params['b1']) # b1 (20,) --> db (20,)
        grads['W2'] = numerical_gradient(f, self.params['W2']) # W2 (20, 10) --> dW2 (20, 10)
        grads['b2'] = numerical_gradient(f, self.params['b2']) # b2 (10,) --> db2 (10,)
        
        return grads
    
    def accuracy(self):
        pass
    

In [7]:
(X_train, y_train), (X_test, y_test) = load_mnist(normalize=True, flatten=True, one_hot_label=True)

In [8]:
network = TwoLayerNet(input_size = 784, hidden_size=20, output_size=10)

In [9]:
# 하이퍼파라미터
iters_num = 1000
batch_size = 100 # 미니배치 사이즈
learning_rate = 0.1

for i in range(iters_num):
    batch_mask = np.random.choice(60000, 100) # 랜덤하게 뽑은 배치의 인덱스
    x_batch = X_train[batch_mask]
    t_batch = y_train[batch_mask]

    # 1. Gradient 
    grads = network.numerical_gradient(x_batch, t_batch)

    # 2. Gradent Descent (모델 파라미터 업데이트)
    for keys in ('W1', 'W2', 'b1', 'b2'):
        # W(new) <- W(old) - (lr * Gradient) : 경사 하강법
        network.params[keys] = network.params[keys] - (learning_rate * grads[keys])

    loss = network.loss(x_batch, t_batch)  
    print(i, loss)

0 2.2881070117432243
1 2.29833773568656
2 2.2995307468748067
3 2.2989320106914
4 2.290952958216265
5 2.2971347074524533
6 2.3031441284648695
7 2.2842155262429946
8 2.297422171566686
9 2.2950297669442574
10 2.2993340739461035
11 2.286568943427824
12 2.2860274915813736
13 2.29118836180709
14 2.2965203223051494
15 2.3122429511053775
16 2.2983584184140304
17 2.29146499095222
18 2.296378431523574
19 2.296480318787845
20 2.294735240486351
21 2.2959126237585696
22 2.279737882681141
23 2.2950106824990746
24 2.305056612289907
25 2.3011702214196834
26 2.2981425803233444
27 2.2985964845081384
28 2.302239662898638
29 2.295276749189689
30 2.2893181286382047
31 2.2923004052862512
32 2.2941532575739765
33 2.290458017437712
34 2.291794373343582
35 2.2838443868072718
36 2.3067230839561006
37 2.2858465521258364
38 2.3001067516888343
39 2.2907311787649274
40 2.3061742684807505
41 2.298593694386522
42 2.2839521009016366
43 2.2874568382972433
44 2.2700826381301638
45 2.300665902195292
46 2.315128006537179


369 1.7489349048165854
370 1.7442531881329437
371 1.6689865917390485
372 1.7795551513047314
373 1.7698361151892046
374 1.772482925214116
375 1.6850335636408824
376 1.6878472672896776
377 1.7267680714665807
378 1.7320796352941403
379 1.6714544977500185
380 1.6298179996210813
381 1.6891905115468546
382 1.7903336806146455
383 1.7790082205530962
384 1.7213479728016288
385 1.6621589661015475
386 1.609518308427409
387 1.7093018232068102
388 1.676433478391299
389 1.6345175669525827
390 1.6119771589902678
391 1.764499224564716
392 1.6557786002084907
393 1.6494034202085088
394 1.613925313341445
395 1.6794454165439143
396 1.7003020716084427
397 1.6313798122159482
398 1.6929521521851023
399 1.6737928407063938
400 1.5736786641336975
401 1.6398666295631403
402 1.692283032742859
403 1.6199881420077529
404 1.6328247082932428
405 1.6458115540924547
406 1.5673463720071297
407 1.6450070127190548
408 1.5884718331898846
409 1.6761867357765403
410 1.6834688260834787
411 1.540975819147835
412 1.609592497805

730 0.8650341707236209
731 0.9652876696893452
732 0.9634246848159859
733 0.9834117081840299
734 0.9376710832741696
735 0.8950827356280092
736 0.887635089108241
737 1.0431027413906266
738 0.9095692805397405
739 0.9121120212852121
740 0.9177130198310095
741 0.9623390171272649
742 0.9688862789678058
743 0.9084243776754272
744 0.9166981232469085
745 0.958574946421302
746 0.9009772834678467
747 0.8606890393864662
748 0.9473992035790024
749 1.0196124508199698
750 0.795311339472924
751 0.9374771517073635
752 0.9532896522359695
753 0.9313171776254037
754 1.007193964921592
755 0.7647481686439696
756 0.8854623872631481
757 0.9534155780511625
758 0.8510805113589072
759 0.8991205475891649
760 0.8561740409162695
761 0.8536215017446499
762 0.912830508726306
763 0.7316040570344746
764 0.8873596741627514
765 0.9171960420950086
766 0.8040032301584656
767 0.8852656668838117
768 0.9294147889516614
769 0.9703926530824227
770 0.8724195214780772
771 0.8992367684706483
772 0.7922209561849672
773 0.8701350485