In [7]:
class TwoLayerNet:
    # 784, 50, 10
    def __init__(self, input_size, hidden_size, output_size,
                weight_init_std=0.01):
        #Init weight
        self.params = {}
        self.params['W1'] = weight_init_std * \
                            np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        
        self.params['W2'] = weight_init_std * \
                            np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
        #계층생성 layers: 순서가 있는 딕셔너리 변수로 신경망의 계층을 보관
        #계층순서 : Affine1->Relu1->Affine2
        self.layers = OrderedDict()
        self.layers['Affine1'] = \
            Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = \
            Affine(self.params['W2'], self.params['b2'])
            
        #lastLayer : 신경망의 마지막 계층
        self.lastLayer = SoftmaxWithLoss()
        
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        
        return x
    
    # x: 입력 데이터, t: 정답 레이블
    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)
        
    # 정확도를 구하는 메서드
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1 : t = np.argmax(t, axis=1)
            
        accuracy = np.sum(y == t) / float(x.shape[0])
        
        return accuracy
    
    # 수치미분을 이용하여 가중치 매개변수의 기울기를 구함
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads
    
    # 오차역전파법을 이용하여 가중치 매개변수의 기울기를 구함
    def gradient(self, x, t):
        #순전파
        self.loss(x, t)
        
        #역전파
        dout = 1
        dout = self.lastLayer.backward(dout)
        
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
            
        #결과저장
        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db
        
        return grads

In [8]:
import numpy as np
import sys, os
sys.path.append(os.pardir)
from dataset.mnist import load_mnist
(x_train, t_train), (x_test, t_test) = \
    load_mnist(normalize=True, one_hot_label=True)
    
network = TwoLayerNet(784, 50, 10)

# hyperparameters 초기화

In [11]:
count = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.01

# list 초기화

In [12]:
train_lost_list = []
train_acc_list = []
test_acc_list = []

In [45]:
epoch_count = max(train_size / batch_size, 1)

In [51]:
for i in range(count):
    #미니배치
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    #기울기 계산
    grad = network.gradient(x_batch, t_batch)
    
    #파라미터 갱신
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]
    
    #손실 구하기
    loss = network.loss(x_batch, t_batch)
    train_lost_list.append(loss)
    
    #정확도 계산
    if i % epoch_count == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(i, train_acc, test_acc)

0 0.112033333333 0.1156
600 0.500416666667 0.5129
1200 0.782433333333 0.7875
1800 0.845183333333 0.8498
2400 0.871216666667 0.8764
3000 0.8843 0.8875
3600 0.89225 0.8946
4200 0.897266666667 0.8995
4800 0.90035 0.9026
5400 0.904 0.905
6000 0.906633333333 0.9092
6600 0.908716666667 0.9118
7200 0.910416666667 0.9126
7800 0.912633333333 0.9154
8400 0.9148 0.9165
9000 0.916966666667 0.919
9600 0.91775 0.9198


In [48]:
network = TwoLayerNet(784, 50, 10)

# hyperparameters 초기화

In [49]:
count = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_late = 0.01

# list 초기화