In [41]:
# Exercise 1
# https://github.com/WegraLee/deep-learning-from-scratch/blob/master/ch05/two_layer_net.py 참고
# coding: utf-8
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
sys.path.append('deep-learning-from-scratch-master')  # load_mnist 모듈을 가져올수 있도록 소스코드 경로 추가
import numpy as np
from common.layers import *
from common.gradient import numerical_gradient
from collections import OrderedDict

# https://github.com/WegraLee/deep-learning-from-scratch/blob/master/common/functions.py
def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T 

    x = x - np.max(x) # 오버플로 대책
    return np.exp(x) / np.sum(np.exp(x))

# https://github.com/WegraLee/deep-learning-from-scratch/blob/master/common/gradient.py 참고
def numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x) # f(x+h)
        
        x[idx] = tmp_val - h 
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)
        
        x[idx] = tmp_val # 값 복원
        it.iternext()   
        
    return grad


class ThreeLayerNet:

    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, weight_init_std = 0.01):
        # 가중치 초기화
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size1)
        self.params['b1'] = np.zeros(hidden_size1)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size1, hidden_size2) 
        self.params['b2'] = np.zeros(hidden_size2)
        self.params['W3'] = weight_init_std * np.random.randn(hidden_size2, output_size) 
        self.params['b3'] = np.zeros(output_size)

        # 계층 생성
        self.layers = OrderedDict()                                           ###
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) ###
        self.layers['Relu1'] = Relu()                                         ###
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) ###
        self.layers['Relu2'] = Relu()  
        self.layers['Affine3'] = Affine(self.params['W3'], self.params['b3']) ###
        
        
        self.lastLayer = SoftmaxWithLoss()                                    ###
        
    def predict(self, x):
        for layer in self.layers.values():                                    ###
            x = layer.forward(x)                                              ###
        
        return x
        
    # x : 입력 데이터, t : 정답 레이블
    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1 : t = np.argmax(t, axis=1)
        
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
        
    # x : 입력 데이터, t : 정답 레이블
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        grads['W3'] = numerical_gradient(loss_W, self.params['W3'])
        grads['b3'] = numerical_gradient(loss_W, self.params['b3'])
        
        return grads
        
    def gradient(self, x, t):
        # forward
        self.loss(x, t)                      ###

        # backward
        dout = 1                             ###
        dout = self.lastLayer.backward(dout) ###
        
        layers = list(self.layers.values())  ###
        layers.reverse()                     ###
        for layer in layers:                 ###
            dout = layer.backward(dout)      ###

        # 결과 저장
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
        grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db
        grads['W3'], grads['b3'] = self.layers['Affine3'].dW, self.layers['Affine3'].db
        
        return grads

In [42]:
import numpy as np
from dataset.mnist import load_mnist

# 데이터 읽기
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

network = ThreeLayerNet(input_size=784, hidden_size1=100, hidden_size2=50, output_size=10)

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 기울기 계산
    #grad = network.numerical_gradient(x_batch, t_batch) # 수치 미분 방식
    grad = network.gradient(x_batch, t_batch) # 오차역전파법 방식(훨씬 빠르다)
    
    # 갱신
    for key in ('W1', 'b1', 'W2', 'b2', 'W3', 'b3'):
        network.params[key] -= learning_rate * grad[key]
    
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(train_acc, test_acc)
print('end')

0.09751666666666667 0.0974
0.7724833333333333 0.7791
0.8977833333333334 0.8991
0.9278333333333333 0.9244
0.9499833333333333 0.9471
0.9609 0.9555
0.96665 0.9594
0.9737666666666667 0.9667
0.9741666666666666 0.9672
0.9795333333333334 0.9696
0.9807 0.97
0.9820333333333333 0.9702
0.98395 0.9705
0.9871666666666666 0.9728
0.9866 0.9742
0.98865 0.9738
0.9908166666666667 0.9742
end


In [39]:
# https://github.com/WegraLee/deep-learning-from-scratch/blob/master/ch05/two_layer_net.py 참고
# coding: utf-8
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
sys.path.append('deep-learning-from-scratch-master')  # load_mnist 모듈을 가져올수 있도록 소스코드 경로 추가
import numpy as np
from common.layers import *
from common.gradient import numerical_gradient
from collections import OrderedDict

# https://github.com/WegraLee/deep-learning-from-scratch/blob/master/common/functions.py
def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T 

    x = x - np.max(x) # 오버플로 대책
    return np.exp(x) / np.sum(np.exp(x))

# https://github.com/WegraLee/deep-learning-from-scratch/blob/master/common/gradient.py 참고
def numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x) # f(x+h)
        
        x[idx] = tmp_val - h 
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)
        
        x[idx] = tmp_val # 값 복원
        it.iternext()   
        
    return grad


class TwoLayerNet:

    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
        # 가중치 초기화
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) 
        self.params['b2'] = np.zeros(output_size)

        # 계층 생성
        self.layers = OrderedDict()                                           ###
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) ###
        self.layers['Relu1'] = Relu()                                         ###
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) ###

        self.lastLayer = SoftmaxWithLoss()                                    ###
        
    def predict(self, x):
        for layer in self.layers.values():                                    ###
            x = layer.forward(x)                                              ###
        
        return x
        
    # x : 입력 데이터, t : 정답 레이블
    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1 : t = np.argmax(t, axis=1)
        
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
        
    # x : 입력 데이터, t : 정답 레이블
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads
        
    def gradient(self, x, t):
        # forward
        self.loss(x, t)                      ###

        # backward
        dout = 1                             ###
        dout = self.lastLayer.backward(dout) ###
        
        layers = list(self.layers.values())  ###
        layers.reverse()                     ###
        for layer in layers:                 ###
            dout = layer.backward(dout)      ###

        # 결과 저장
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
        grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db

        return grads

In [40]:
# coding: utf-8
#import sys, os
#sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
#from two_layer_net import TwoLayerNet

# 데이터 읽기
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 기울기 계산
    #grad = network.numerical_gradient(x_batch, t_batch) # 수치 미분 방식
    grad = network.gradient(x_batch, t_batch) # 오차역전파법 방식(훨씬 빠르다)
    
    # 갱신
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]
    
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(train_acc, test_acc)

0.13075 0.1253
0.9023 0.9054
0.9244166666666667 0.9273
0.9348333333333333 0.9359
0.9426333333333333 0.9428
0.9514166666666667 0.9492
0.9555 0.9531
0.9613666666666667 0.9586
0.9648166666666667 0.9601
0.9667 0.961
0.9703166666666667 0.964
0.9722333333333333 0.9649
0.97345 0.9666
0.9761166666666666 0.9677
0.9769833333333333 0.9695
0.978 0.9678
0.97975 0.97
