In [2]:
import sys
import os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정

In [3]:
# coding: utf-8
from common.layers import *
from collections import OrderedDict
from datetime import datetime
import numpy as np
from dataset.mnist import load_mnist
from common.optimizer import Adam
from tqdm import tqdm
import pandas as pd

# set seed
np.random.seed(42)

# set path
log_path = 'log_' + datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
if not os.path.exists(log_path):
    os.makedirs(log_path, exist_ok=True)


class AdvancedMultiLayerNet:

    def __init__(self, input_size, hidden_size_list, output_size, weight_decay_lambda=0,
                 use_dropout=False, dropout_ration=0.5, use_batchnorm=False):

        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)
        self.use_dropout = use_dropout
        self.weight_decay_lambda = weight_decay_lambda
        self.use_batchnorm = use_batchnorm
        self.params = {}

        # 가중치 초기화
        all_size_list = [self.input_size] + \
            self.hidden_size_list + [self.output_size]
        for idx in range(1, len(all_size_list)):
            scale = np.sqrt(2.0 / all_size_list[idx - 1])
            self.params[f"W{idx}"] = scale * \
                np.random.randn(all_size_list[idx-1], all_size_list[idx])
            self.params[f"b{idx}"] = np.zeros(all_size_list[idx])

        # 계층 생성
        self.layers = OrderedDict()
        for idx in range(1, self.hidden_layer_num+1):
            self.layers[f'Affine{idx}'] = Affine(
                self.params[f"W{idx}"], self.params[f"b{idx}"])
            if self.use_batchnorm:
                self.params[f'gamma{idx}'] = np.ones(hidden_size_list[idx-1])
                self.params[f'beta{idx}'] = np.zeros(hidden_size_list[idx-1])
                self.layers[f'BatchNorm{idx}'] = BatchNormalization(
                    self.params[f'gamma{idx}'], self.params[f'beta{idx}'])

            self.layers[f'Activation_function{idx}'] = Relu()

            if self.use_dropout:
                self.layers[f'Dropout{idx}'] = Dropout(dropout_ration)

        idx = self.hidden_layer_num + 1
        self.layers[f'Affine{idx}'] = Affine(
            self.params[f'W{idx}'], self.params[f"b{idx}"])

        self.last_layer = SoftmaxWithLoss()

    def predict(self, x, train_flg=False):
        for key, layer in self.layers.items():
            if "Dropout" in key or "BatchNorm" in key:
                x = layer.forward(x, train_flg)
            else:
                x = layer.forward(x)

        return x

    def loss(self, x, t, train_flg=False):
        y = self.predict(x, train_flg)

        weight_decay = 0
        for idx in range(1, self.hidden_layer_num + 2):
            W = self.params[f"W{idx}"]
            weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)

        return self.last_layer.forward(y, t) + weight_decay

    def accuracy(self, X, T):
        Y = self.predict(X, train_flg=False)
        Y = np.argmax(Y, axis=1)
        if T.ndim != 1:
            T = np.argmax(T, axis=1)

        accuracy = np.sum(Y == T) / float(X.shape[0])
        return accuracy

    def gradient(self, x, t):
        # forward
        self.loss(x, t, train_flg=True)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 결과 저장
        grads = {}
        for idx in range(1, self.hidden_layer_num+2):
            grads[f"W{idx}"] = self.layers[f'Affine{idx}'].dW + \
                self.weight_decay_lambda * self.params[f"W{idx}"]
            grads[f"b{idx}"] = self.layers[f'Affine{idx}'].db

            if self.use_batchnorm and idx != self.hidden_layer_num+1:
                grads[f'gamma{idx}'] = self.layers[f'BatchNorm{idx}'].dgamma
                grads[f'beta{idx}'] = self.layers[f'BatchNorm{idx}'].dbeta

        return grads

ModuleNotFoundError: No module named 'tqdm'

In [3]:
# 고정 하이퍼파라미터 설정
verbose = False
use_dropout = True
epochs = 10000
iter_per_epoch = 1000


# 조정가능한 하이퍼파라미터 설정
batch_size_list = [50, 100, 150, 200]
lr_list = [0.001, 0.005, 0.01, 0.015, 0.018]
hidden_size_list_list = [
    [10], [10, 10], [8, 12],
    [10, 10, 10],
    [7, 9, 8],
    [10, 10, 10, 10],
    [6, 7, 8, 9]]
dropout_ratio_list = [0.01, 0.05, 0.1, 0.15]

In [4]:
# 데이터 읽기
(x_train, t_train), (x_test, t_test) = load_mnist(
    normalize=True, one_hot_label=True)
train_size = x_train.shape[0]


def train(batch_size, lr, hidden_size_list, dropout_ratio):

    optimizer = Adam(lr=lr)
    train_acc_list = []
    test_acc_list = []

    train_loss_list = []
    train_loss_per_epoch = []

    network = AdvancedMultiLayerNet(input_size=784, hidden_size_list=hidden_size_list,
                                    output_size=10, use_dropout=use_dropout, dropout_ration=dropout_ratio,
                                    weight_decay_lambda=0.01, use_batchnorm=True)

    for epoch in tqdm(range(epochs)):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        grads = network.gradient(x_batch, t_batch)
        optimizer.update(network.params, grads)

        loss = network.loss(x_batch, t_batch)
        train_loss_per_epoch.append(loss)
        # if verbose: print("train loss:" + str(loss))

        if epoch % iter_per_epoch == 0:
            x_train_sample, t_train_sample = x_train, t_train
            x_test_sample, t_test_sample = x_test, t_test

            train_acc = network.accuracy(x_train_sample, t_train_sample)
            test_acc = network.accuracy(x_test_sample, t_test_sample)
            train_acc_list.append(train_acc)
            test_acc_list.append(test_acc)
            train_loss_list.append(np.mean(train_loss_per_epoch))
            train_loss_per_epoch = []

            if verbose:
                print("=== epoch:" + str(round(epoch, 5)) + ", train acc:" +
                      str(round(train_acc, 5)) + ", test acc:" + str(round(test_acc, 5)) + " ===")

    test_acc = network.accuracy(x_test, t_test)

    # save epoch, train_acc, train_loss, test_acc as csv
    epoch_list = list(range(0, epochs, iter_per_epoch))
    df = pd.DataFrame(
        {"epoch": epoch_list, "train_loss": train_loss_list, "train_acc": train_acc_list, "test_acc": test_acc_list})
    log_file_name = f"{log_path}/{datetime.now().strftime('%H-%M-%S')}.csv"
    df.to_csv(log_file_name, index=False)

    if verbose:
        print("=============== Final Test Accuracy ===============")
        print("test acc:" + str(test_acc))

    return test_acc, log_file_name

In [5]:
result_list = []
for batch_size in batch_size_list:
    for lr in lr_list:
        for hidden_size_list in hidden_size_list_list:
            for dropout_ratio in dropout_ratio_list:
                print(f"batch_size: {batch_size}, lr: {lr}, hidden_size_list: "
                      f"{hidden_size_list}, dropout_ratio: {dropout_ratio}")
                test_acc, log_file_name = train(
                    batch_size, lr, hidden_size_list, dropout_ratio)
                result_list.append(
                    (batch_size, lr, str(hidden_size_list), dropout_ratio, log_file_name, test_acc))
                print(f"test_acc: {test_acc}, "
                      f"log_file_name: {log_file_name}\n")

best_combination = max(result_list, key=lambda x: x[-1])
print("=============== Best Result ===============")
print(f"batch_size: {best_combination[0]}, lr: {best_combination[1]}, hidden_size_list: "
      f"{best_combination[2]}, dropout_ratio: {best_combination[3]}\n"
      f"test_acc: {best_combination[5]}, "
      f"log_file_name: {best_combination[4]}\n")

batch_size: 10, lr: 0.0016, hidden_size_list: [15, 14, 14, 12], dropout_ratio: 0.016


100%|██████████| 10000/10000 [00:09<00:00, 1109.99it/s]


test_acc: 0.9046, log_file_name: log_2024-05-11_11-29-21/11-29-30.csv

batch_size: 10, lr: 0.0016, hidden_size_list: [15, 14, 14, 12], dropout_ratio: 0.008


100%|██████████| 10000/10000 [00:09<00:00, 1093.61it/s]


test_acc: 0.9165, log_file_name: log_2024-05-11_11-29-21/11-29-39.csv

batch_size: 20, lr: 0.0016, hidden_size_list: [15, 14, 14, 12], dropout_ratio: 0.016


100%|██████████| 10000/10000 [00:09<00:00, 1001.35it/s]


test_acc: 0.9322, log_file_name: log_2024-05-11_11-29-21/11-29-49.csv

batch_size: 20, lr: 0.0016, hidden_size_list: [15, 14, 14, 12], dropout_ratio: 0.008


100%|██████████| 10000/10000 [00:09<00:00, 1065.17it/s]

test_acc: 0.9287, log_file_name: log_2024-05-11_11-29-21/11-29-59.csv

batch_size: 20, lr: 0.0016, hidden_size_list: [15, 14, 14, 12], dropout_ratio: 0.016
test_acc: 0.9322, log_file_name: log_2024-05-11_11-29-21/11-29-49.csv




