<h3>두 개의 layer를 갖는 network를 생성하고, 미니배치 학습을 테스트해본다.</h3>

In [1]:
from common.functions import *
from common.gradient import numerical_gradient

In [2]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        
        # 가중치 초기화
        self.params = {}
        self.params['W1'] = weight_init_std * \
                            np.random.randn(input_size, hidden_size)
        
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * \
                            np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        return y
    
    # x: 입력, t: 정답
    def loss(self, x, t):
        y = self.predict(x)
        
        return cross_entropy_error(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        
        accuracy = np.sum(y==t) / float(x.shape[0])
        
    # x: 입력, t: 정답
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x,t)
        
        grads = {}
        
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads

In [3]:
class TwoLayerNet:

    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 가중치 초기화
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
    
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        return y
        
    # x : 입력 데이터, t : 정답 레이블
    def loss(self, x, t):
        y = self.predict(x)
        
        return cross_entropy_error(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
        
    # x : 입력 데이터, t : 정답 레이블
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads
        
    def gradient(self, x, t):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        grads = {}
        
        batch_num = x.shape[0]
        
        # forward
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        # backward
        dy = (y - t) / batch_num
        grads['W2'] = np.dot(z1.T, dy)
        grads['b2'] = np.sum(dy, axis=0)
        
        da1 = np.dot(dy, W2.T)
        dz1 = sigmoid_grad(a1) * da1
        grads['W1'] = np.dot(x.T, dz1)
        grads['b1'] = np.sum(dz1, axis=0)

        return grads


In [4]:
net = TwoLayerNet(784, 100, 10)

In [5]:
print(net.params['W1'].shape)
print(net.params['b1'].shape)
print(net.params['W2'].shape)
print(net.params['b2'].shape)

(784, 100)
(100,)
(100, 10)
(10,)


In [6]:
# 예측 테스트
x = np.random.rand(100,784) # 임의의 100개의 데이터
t = net.predict(x)

<b>x, t를 입력했을 때, loss function(cross entropy)을 W1, b1, W2, b2에 대해 편미분한 값을 grads에 저장

In [7]:
# gradient 계산 테스트
x = np.random.rand(100, 784)
t = np.random.rand(100, 10)

grads = net.numerical_gradient(x,t)

<h3> 미니배치 학습 구현</h3>

In [18]:
import numpy as np
from dataset.mnist import load_mnist

In [19]:
(x_train, t_train), (x_teset, t_test)= load_mnist(normalize=True, one_hot_label=True)

In [25]:
train_loss_list = []

# 하이퍼 파라미터
iters_num = 10000 # 반복횟수
train_size = x_train.shape[0]
batch_size = 100
learnging_rate = 0.1

In [26]:
network = TwoLayerNet(784, 50, 10)

In [28]:
batch_mask = np.random.choice(train_size,batch_size)

array([ 9976,  8197, 15880, 42031,  2434,   439, 56910, 52138, 35592,
       17289, 27551, 28271, 44753, 52673, 21514, 22105,  3206, 35439,
       38129,  5019, 34101, 47743, 59481, 17195, 28337, 27249, 59940,
        1488, 29592,  7882, 14407, 12122,  1295,  6656, 14718, 30470,
       53341, 19969,  9626, 26694, 43905, 38935, 19701, 50958, 27619,
       27835, 59131,  5277, 15788, 29525, 12307, 22063, 43403, 43594,
       25856, 24533, 12625, 28888, 55438, 31057,  1081, 33421, 34833,
       33635, 45551, 21259, 55171,  9654, 46609, 59705, 57580, 48349,
       38277,  1327, 34780, 23229,  2584, 41759, 32969,  5614, 42083,
       54800, 45111, 50325, 22615, 48580,  1816, 14189, 18730, 31166,
       32210, 17719, 27881, 40939,   411, 21514, 36798,  7927, 57893,
       35496])

In [35]:
x_batch = x_train[batch_mask] # 100행 784열 => 100개의 데이터 784개의 열
t_batch = t_train[batch_mask]