In [14]:
from layers import *
from utils import *

In [15]:
from collections import OrderedDict
import numpy as np
from dataset import load_mnist

In [16]:
class ThreeLayerNet:
    def __init__(self, input_size, hidden_size, output_size, std=1e-2):
        self.params = {}
        self.params['W1'] = std * np.random.randn(input_size, hidden_size[0])
        self.params['b1'] = np.zeros(hidden_size[0])
        self.params['W2'] = std * np.random.randn(hidden_size[0], hidden_size[1])
        self.params['b2'] = np.zeros(hidden_size[1])
        self.params['W3'] = std * np.random.randn(hidden_size[1], output_size)
        self.params['b3'] = np.zeros(output_size)

        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['Sigmoid2'] = Sigmoid()
        self.layers['Affine3'] = Affine(self.params['W3'], self.params['b3'])
        
        self.lastLayer = SoftmaxWithLoss()

        self.x_train = None
        self.t_train = None

    def predict(self, X):
        for layer in self.layers.values():
            X = layer.forward(X)
        return X

    def loss(self, X, T):
        Y = self.predict(X)
        Y, loss = self.lastLayer.forward(Y, T)
        return loss
    
    def gradient(self, X, T):
        loss = self.loss(X, T)

        dout = 1
        dout = self.lastLayer.backward()

        layers_reverse = reversed(list(self.layers.values()))

        for layer in layers_reverse:
            dout = layer.backward(dout)

        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db
        grads['W3'] = self.layers['Affine3'].dW
        grads['b3'] = self.layers['Affine3'].db

        return grads
    
    def set_x_train(self, x_train):
        self.x_train = x_train
    
    def set_t_train(self, t_train):
        self.t_train = t_train

    def train(self, total_steps=10000, learning_rate=0.1, batch_size=100):
        train_loss_list = []
        train_size = self.x_train.shape[0]
        iter_per_epoch = max(train_size / batch_size, 1)

        for i in range(total_steps):
            batch_mask = np.random.choice(train_size, batch_size)
            x_batch = self.x_train[batch_mask]
            t_batch = self.t_train[batch_mask]

            grad = self.gradient(x_batch, t_batch)

            for key in grad.keys():
                self.params[key] -= learning_rate * grad[key]

            loss = self.loss(x_batch, t_batch)
            train_loss_list.append(loss)
            
            # 출력은 선택
            if i % iter_per_epoch == 0:
                print('iteration: ', i, 'loss: ', loss)

In [17]:
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
nn = ThreeLayerNet(input_size=784, hidden_size=[100, 50], output_size=10)

In [23]:
total_steps = 10000
batch_size = 100
learning_rate = 0.01

nn.set_x_train(x_train)
nn.set_t_train(t_train)

In [None]:
nn.train(total_steps=total_steps, learning_rate=learning_rate, batch_size=batch_size)

iteration:  0 loss:  0.01344703218654704
iteration:  600 loss:  0.15616440401706932
iteration:  1200 loss:  0.010323745371247844
iteration:  1800 loss:  0.009437221813866829
iteration:  2400 loss:  0.022616833131692906
iteration:  3000 loss:  0.07623923021737138
iteration:  3600 loss:  0.02479571531372056
iteration:  4200 loss:  0.012289480786286732
iteration:  4800 loss:  0.010589621813173394
iteration:  5400 loss:  0.008140212785947624
iteration:  6000 loss:  0.009754762245951768
iteration:  6600 loss:  0.017610982555187695
iteration:  7200 loss:  0.02033651681206389
iteration:  7800 loss:  0.014214597963067399
iteration:  8400 loss:  0.007869395060931648
iteration:  9000 loss:  0.006932206852750481
iteration:  9600 loss:  0.009876768476993276
