In [1]:
import sys
import os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정

In [2]:
# coding: utf-8
from common.gradient import numerical_gradient
from common.layers import *
from collections import OrderedDict
import numpy as np


class AdvancedMultiLayerNet:

    def __init__(self, input_size, hidden_size_list, output_size, weight_decay_lambda=0,
                 use_dropout=False, dropout_ration=0.5, use_batchnorm=False):

        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)
        self.use_dropout = use_dropout
        self.weight_decay_lambda = weight_decay_lambda
        self.use_batchnorm = use_batchnorm
        self.params = {}

        # 가중치 초기화
        all_size_list = [self.input_size] + \
            self.hidden_size_list + [self.output_size]
        for idx in range(1, len(all_size_list)):
            scale = np.sqrt(2.0 / all_size_list[idx - 1])
            self.params[f"W{idx}"] = scale * \
                np.random.randn(all_size_list[idx-1], all_size_list[idx])
            self.params[f"b{idx}"] = np.zeros(all_size_list[idx])

        # 계층 생성
        self.layers = OrderedDict()
        for idx in range(1, self.hidden_layer_num+1):
            self.layers[f'Affine{idx}'] = Affine(
                self.params[f"W{idx}"], self.params[f"b{idx}"])
            if self.use_batchnorm:
                self.params[f'gamma{idx}'] = np.ones(hidden_size_list[idx-1])
                self.params[f'beta{idx}'] = np.zeros(hidden_size_list[idx-1])
                self.layers[f'BatchNorm{idx}'] = BatchNormalization(
                    self.params[f'gamma{idx}'], self.params[f'beta{idx}'])

            self.layers[f'Activation_function{idx}'] = Relu()

            if self.use_dropout:
                self.layers[f'Dropout{idx}'] = Dropout(dropout_ration)

        idx = self.hidden_layer_num + 1
        self.layers[f'Affine{idx}'] = Affine(
            self.params[f'W{idx}'], self.params[f"b{idx}"])

        self.last_layer = SoftmaxWithLoss()

    def predict(self, x, train_flg=False):
        for key, layer in self.layers.items():
            if "Dropout" in key or "BatchNorm" in key:
                x = layer.forward(x, train_flg)
            else:
                x = layer.forward(x)

        return x

    def loss(self, x, t, train_flg=False):
        y = self.predict(x, train_flg)

        weight_decay = 0
        for idx in range(1, self.hidden_layer_num + 2):
            W = self.params[f"W{idx}"]
            weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)

        return self.last_layer.forward(y, t) + weight_decay

    def accuracy(self, X, T):
        Y = self.predict(X, train_flg=False)
        Y = np.argmax(Y, axis=1)
        if T.ndim != 1:
            T = np.argmax(T, axis=1)

        accuracy = np.sum(Y == T) / float(X.shape[0])
        return accuracy

    def gradient(self, x, t):
        # forward
        self.loss(x, t, train_flg=True)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 결과 저장
        grads = {}
        for idx in range(1, self.hidden_layer_num+2):
            grads[f"W{idx}"] = self.layers[f'Affine{idx}'].dW + \
                self.weight_decay_lambda * self.params[f"W{idx}"]
            grads[f"b{idx}"] = self.layers[f'Affine{idx}'].db

            if self.use_batchnorm and idx != self.hidden_layer_num+1:
                grads[f'gamma{idx}'] = self.layers[f'BatchNorm{idx}'].dgamma
                grads[f'beta{idx}'] = self.layers[f'BatchNorm{idx}'].dbeta

        return grads

In [3]:
from common.optimizer import Adam

# 하이퍼파라미터 설정
verbose = True
epochs = 10000
batch_size = 1280
iter_per_epoch = 1000
optimizer = Adam(lr=0.0016)
hidden_size_list = [15, 14, 14, 12]

use_dropout = True
dropout_ratio = 0.016

In [4]:
from dataset.mnist import load_mnist
import matplotlib.pyplot as plt
import numpy as np
from common.trainer import Trainer

# 데이터 읽기
(x_train, t_train), (x_test, t_test) = load_mnist(
    normalize=True, one_hot_label=True)

network = AdvancedMultiLayerNet(input_size=784, hidden_size_list=hidden_size_list,
                                output_size=10, use_dropout=use_dropout, dropout_ration=dropout_ratio,
                                weight_decay_lambda=0.01, use_batchnorm=True)


train_size = x_train.shape[0]

current_iter = 0
current_epoch = 0
train_loss_list = []
train_acc_list = []
test_acc_list = []

In [6]:
for epoch in range(epochs):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grads = network.gradient(x_batch, t_batch)
    optimizer.update(network.params, grads)

    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    # if verbose: print("train loss:" + str(loss))

    if epoch % iter_per_epoch == 0:
        x_train_sample, t_train_sample = x_train, t_train
        x_test_sample, t_test_sample = x_test, t_test

        train_acc = network.accuracy(x_train_sample, t_train_sample)
        test_acc = network.accuracy(x_test_sample, t_test_sample)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)

        if verbose:
            print("=== epoch:" + str(round(epoch, 5)) + ", train acc:" +
                  str(round(train_acc, 5)) + ", test acc:" + str(round(test_acc, 5)) + " ===")


test_acc = network.accuracy(x_test, t_test)

if verbose:
    print("=============== Final Test Accuracy ===============")
    print("test acc:" + str(test_acc))

=== epoch:0, train acc:0.1726, test acc:0.1649 ===
=== epoch:1000, train acc:0.9598, test acc:0.9489 ===
=== epoch:2000, train acc:0.96238, test acc:0.9525 ===
=== epoch:3000, train acc:0.95802, test acc:0.9498 ===
=== epoch:4000, train acc:0.96802, test acc:0.9558 ===
=== epoch:5000, train acc:0.96793, test acc:0.9555 ===
=== epoch:6000, train acc:0.9655, test acc:0.9571 ===
=== epoch:7000, train acc:0.9642, test acc:0.9529 ===
=== epoch:8000, train acc:0.96575, test acc:0.9531 ===
=== epoch:9000, train acc:0.96962, test acc:0.9588 ===
test acc:0.9588
