## 1. mlpmodel.ipynb(객체형)

In [48]:
import numpy as np

np.random.seed(1234)
def randomize(): np.random.seed(time.time())
class Model(object):
    def __init__(self, name, dataset):
        self.name = name
        self.dataset = dataset
        self.is_training = False
        if not hasattr(self, 'rand_std'): self.rand_std = 0.030

    def __str__(self):
        return '{}/{}'.format(self.name, self.dataset)

    def exec_all(self, epoch_count=10, batch_size=10, learning_rate=0.01,
                 report=0, show_cnt=3):
        self.train(epoch_count, batch_size, learning_rate, report)
        self.test()
        if show_cnt > 0: self.visualize(show_cnt)
class MlpModel(Model):
    def __init__(self, name, dataset, hconfigs):
        super(MlpModel, self).__init__(name, dataset)
        self.init_parameters(hconfigs)
def mlp_init_parameters(self, hconfigs):
    self.hconfigs = hconfigs
    self.pm_hiddens = []

    prev_shape = self.dataset.input_shape

    for hconfig in hconfigs:
        pm_hidden, prev_shape = self.alloc_layer_param(prev_shape, hconfig)
        self.pm_hiddens.append(pm_hidden)

    output_cnt = int(np.prod(self.dataset.output_shape))
    self.pm_output, _ = self.alloc_layer_param(prev_shape, output_cnt)

def mlp_alloc_layer_param(self, input_shape, hconfig):
    input_cnt = np.prod(input_shape)
    output_cnt = hconfig

    weight, bias = self.alloc_param_pair([input_cnt, output_cnt])

    return {'w':weight, 'b':bias}, output_cnt

def mlp_alloc_param_pair(self, shape):
    weight = np.random.normal(0, self.rand_std, shape)
    bias = np.zeros([shape[-1]])
    return weight, bias

MlpModel.init_parameters = mlp_init_parameters
MlpModel.alloc_layer_param = mlp_alloc_layer_param
MlpModel.alloc_param_pair = mlp_alloc_param_pair
def mlp_model_train(self, epoch_count=10, batch_size=10, \
                    learning_rate=0.001, report=0):
    self.learning_rate = learning_rate

    batch_count = int(self.dataset.train_count / batch_size)
    time1 = time2 = int(time.time())
    if report != 0:
        print('Model {} train started:'.format(self.name))

    for epoch in range(epoch_count):
        costs = []
        accs = []
        self.dataset.shuffle_train_data(batch_size*batch_count)
        for n in range(batch_count):
            trX, trY = self.dataset.get_train_data(batch_size, n)
            cost, acc = self.train_step(trX, trY)
            costs.append(cost)
            accs.append(acc)

        if report > 0 and (epoch+1) % report == 0:
            vaX, vaY = self.dataset.get_validate_data(100)
            acc = self.eval_accuracy(vaX, vaY)
            time3 = int(time.time())
            tm1, tm2 = time3-time2, time3-time1
            self.dataset.train_prt_result(epoch+1, costs, accs, acc, tm1, tm2)
            time2 = time3

    tm_total = int(time.time()) - time1
    print('Model {} train ended in {} secs:'.format(self.name, tm_total))

MlpModel.train = mlp_model_train
def mlp_model_test(self):
    teX, teY = self.dataset.get_test_data()
    time1 = int(time.time())
    acc = self.eval_accuracy(teX, teY)
    time2 = int(time.time())
    self.dataset.test_prt_result(self.name, acc, time2-time1)

MlpModel.test = mlp_model_test
def mlp_model_visualize(self, num):
    print('Model {} Visualization'.format(self.name))
    deX, deY = self.dataset.get_visualize_data(num)
    est = self.get_estimate(deX)
    self.dataset.visualize(deX, est, deY)

MlpModel.visualize = mlp_model_visualize
def mlp_train_step(self, x, y):
    self.is_training = True

    output, aux_nn = self.forward_neuralnet(x)
    loss, aux_pp = self.forward_postproc(output, y)
    accuracy = self.eval_accuracy(x, y, output)

    G_loss = 1.0
    G_output = self.backprop_postproc(G_loss, aux_pp)
    self.backprop_neuralnet(G_output, aux_nn)

    self.is_training = False

    return loss, accuracy

MlpModel.train_step = mlp_train_step
def mlp_forward_neuralnet(self, x):
    hidden = x
    aux_layers = []

    for n, hconfig in enumerate(self.hconfigs):
        hidden, aux = self.forward_layer(hidden, hconfig, self.pm_hiddens[n])
        aux_layers.append(aux)

    output, aux_out = self.forward_layer(hidden, None, self.pm_output)

    return output, [aux_out, aux_layers]

def mlp_backprop_neuralnet(self, G_output, aux):
    aux_out, aux_layers = aux

    G_hidden = self.backprop_layer(G_output, None, self.pm_output, aux_out)

    for n in reversed(range(len(self.hconfigs))):
        hconfig, pm, aux = self.hconfigs[n], self.pm_hiddens[n], aux_layers[n]
        G_hidden = self.backprop_layer(G_hidden, hconfig, pm, aux)

    return G_hidden

MlpModel.forward_neuralnet = mlp_forward_neuralnet
MlpModel.backprop_neuralnet = mlp_backprop_neuralnet
def mlp_forward_layer(self, x, hconfig, pm):
    y = np.matmul(x, pm['w']) + pm['b']
    if hconfig is not None: y = relu(y)
    return y, [x,y]

def mlp_backprop_layer(self, G_y, hconfig, pm, aux):
    x, y = aux

    if hconfig is not None: G_y = relu_derv(y) * G_y

    g_y_weight = x.transpose()
    g_y_input = pm['w'].transpose()

    G_weight = np.matmul(g_y_weight, G_y)
    G_bias = np.sum(G_y, axis=0)
    G_input = np.matmul(G_y, g_y_input)

    pm['w'] -= self.learning_rate * G_weight
    pm['b'] -= self.learning_rate * G_bias

    return G_input

MlpModel.forward_layer = mlp_forward_layer
MlpModel.backprop_layer = mlp_backprop_layer
def mlp_forward_postproc(self, output, y):
    loss, aux_loss = self.dataset.forward_postproc(output, y)
    extra, aux_extra = self.forward_extra_cost(y)
    return loss + extra, [aux_loss, aux_extra]

def mlp_forward_extra_cost(self, y):
    return 0, None

MlpModel.forward_postproc = mlp_forward_postproc
MlpModel.forward_extra_cost = mlp_forward_extra_cost
def mlp_backprop_postproc(self, G_loss, aux):
    aux_loss, aux_extra = aux
    self.backprop_extra_cost(G_loss, aux_extra)
    G_output = self.dataset.backprop_postproc(G_loss, aux_loss)
    return G_output

def mlp_backprop_extra_cost(self, G_loss, aux):
    pass

MlpModel.backprop_postproc = mlp_backprop_postproc
MlpModel.backprop_extra_cost = mlp_backprop_extra_cost
def mlp_eval_accuracy(self, x, y, output=None):
    if output is None:
        output, _ = self.forward_neuralnet(x)
    accuracy = self.dataset.eval_accuracy(x, y, output)
    return accuracy

MlpModel.eval_accuracy = mlp_eval_accuracy
def mlp_get_estimate(self, x):
    output, _ = self.forward_neuralnet(x)
    estimate = self.dataset.get_estimate(output)
    return estimate

MlpModel.get_estimate = mlp_get_estimate

In [64]:
import numpy as np
import time
import os
import csv
import copy
import wave
import cv2
import matplotlib.pyplot as plt
from PIL import Image
from IPython.core.display import HTML

np.random.seed(1234)

def randomize():
    np.random.seed(time.time())

class Model(object):
    def __init__(self, name, dataset):
        self.name = name
        self.dataset = dataset
        self.is_training = False
        if not hasattr(self, 'rand_std'):
            self.rand_std = 0.030

    def __str__(self):
        return '{}/{}'.format(self.name, self.dataset)

    def exec_all(self, epoch_count=10, batch_size=10, learning_rate=0.01, report=0, show_cnt=3):
        self.train(epoch_count, batch_size, learning_rate, report)
        self.test()
        if show_cnt > 0:
            self.visualize(show_cnt)

class MlpModel(Model):
    def __init__(self, name, dataset, hconfigs, dropout_p=None):
        super(MlpModel, self).__init__(name, dataset)
        self.dropout_p = dropout_p
        self.init_parameters(hconfigs)

    def init_parameters(self, hconfigs):
        self.hconfigs = hconfigs
        self.pm_hiddens = []
        prev_shape = self.dataset.input_shape
        for hconfig in hconfigs:
            pm_hidden, prev_shape = self.alloc_layer_param(prev_shape, hconfig)
            self.pm_hiddens.append(pm_hidden)
        output_cnt = int(np.prod(self.dataset.output_shape))
        self.pm_output, _ = self.alloc_layer_param(prev_shape, output_cnt)

    def alloc_layer_param(self, input_shape, hconfig):
        input_cnt = np.prod(input_shape)
        output_cnt = hconfig
        weight, bias = self.alloc_param_pair([input_cnt, output_cnt])
        return {'w': weight, 'b': bias}, output_cnt

    def alloc_param_pair(self, shape):
        weight = np.random.normal(0, self.rand_std, shape)
        bias = np.zeros([shape[-1]])
        return weight, bias

    def train(self, epoch_count=10, batch_size=10, learning_rate=0.001, report=0):
        self.learning_rate = learning_rate
        batch_count = int(self.dataset.train_count / batch_size)
        time1 = time2 = int(time.time())
        if report != 0:
            print('Model {} train started:'.format(self.name))
        for epoch in range(epoch_count):
            costs = []
            accs = []
            self.dataset.shuffle_train_data(batch_size * batch_count)
            for n in range(batch_count):
                trX, trY = self.dataset.get_train_data(batch_size, n)
                cost, acc = self.train_step(trX, trY)
                costs.append(cost)
                accs.append(acc)
            if report > 0 and (epoch + 1) % report == 0:
                vaX, vaY = self.dataset.get_validate_data(100)
                acc = self.eval_accuracy(vaX, vaY)
                time3 = int(time.time())
                tm1, tm2 = time3 - time2, time3 - time1
                self.dataset.train_prt_result(epoch + 1, costs, accs, acc, tm1, tm2)
                time2 = time3
        tm_total = int(time.time()) - time1
        print('Model {} train ended in {} secs:'.format(self.name, tm_total))

    def test(self):
        teX, teY = self.dataset.get_test_data()
        time1 = int(time.time())
        acc = self.eval_accuracy(teX, teY)
        time2 = int(time.time())
        self.dataset.test_prt_result(self.name, acc, time2 - time1)

    def visualize(self, num):
        print('Model {} Visualization'.format(self.name))
        deX, deY = self.dataset.get_visualize_data(num)
        est = self.get_estimate(deX)
        self.dataset.visualize(deX, est, deY)

    def train_step(self, x, y):
        self.is_training = True
        output, aux_nn = self.forward_neuralnet(x)
        loss, aux_pp = self.forward_postproc(output, y)
        accuracy = self.eval_accuracy(x, y, output)
        G_loss = 1.0
        G_output = self.backprop_postproc(G_loss, aux_pp)
        self.backprop_neuralnet(G_output, aux_nn)
        self.is_training = False
        return loss, accuracy

    def forward_neuralnet(self, x):
        hidden = x
        aux_layers = []
        for n, hconfig in enumerate(self.hconfigs):
            hidden, aux = self.forward_layer(hidden, hconfig, self.pm_hiddens[n])
            aux_layers.append(aux)
        output, aux_out = self.forward_layer(hidden, None, self.pm_output)
        return output, [aux_out, aux_layers]

    def backprop_neuralnet(self, G_output, aux):
        aux_out, aux_layers = aux
        G_hidden = self.backprop_layer(G_output, None, self.pm_output, aux_out)
        for n in reversed(range(len(self.hconfigs))):
            hconfig, pm, aux = self.hconfigs[n], self.pm_hiddens[n], aux_layers[n]
            G_hidden = self.backprop_layer(G_hidden, hconfig, pm, aux)
        return G_hidden

    def forward_layer(self, x, hconfig, pm):
        y = np.matmul(x, pm['w']) + pm['b']
        if hconfig is not None:
            y = relu(y)
            if self.dropout_p is not None and self.is_training:
                dropout_mask = np.random.binomial(1, self.dropout_p, size=y.shape) / self.dropout_p
                y *= dropout_mask
                return y, [x, y, dropout_mask]
            return y, [x, y]
        return y, [x, y]

    def backprop_layer(self, G_y, hconfig, pm, aux):
        if self.dropout_p is not None and self.is_training and hconfig is not None:
            x, y, dropout_mask = aux
            G_y *= dropout_mask
        else:
            x, y = aux
        if hconfig is not None:
            G_y = relu_derv(y) * G_y
        g_y_weight = x.transpose()
        g_y_input = pm['w'].transpose()
        G_weight = np.matmul(g_y_weight, G_y)
        G_bias = np.sum(G_y, axis=0)
        G_input = np.matmul(G_y, g_y_input)
        pm['w'] -= self.learning_rate * G_weight
        pm['b'] -= self.learning_rate * G_bias
        return G_input

    def forward_postproc(self, output, y):
        loss, aux_loss = self.dataset.forward_postproc(output, y)
        extra, aux_extra = self.forward_extra_cost(y)
        return loss + extra, [aux_loss, aux_extra]

    def forward_extra_cost(self, y):
        return 0, None

    def backprop_postproc(self, G_loss, aux):
        aux_loss, aux_extra = aux
        self.backprop_extra_cost(G_loss, aux_extra)
        G_output = self.dataset.backprop_postproc(G_loss, aux_loss)
        return G_output

    def backprop_extra_cost(self, G_loss, aux):
        pass

    def eval_accuracy(self, x, y, output=None):
        if output is None:
            output, _ = self.forward_neuralnet(x)
        accuracy = self.dataset.eval_accuracy(x, y, output)
        return accuracy

    def get_estimate(self, x):
        output, _ = self.forward_neuralnet(x)
        estimate = self.dataset.get_estimate(output)
        return estimate

class AdamModel(MlpModel):
    def __init__(self, name, dataset, hconfigs, dropout_p=None):
        self.use_adam = True
        super(AdamModel, self).__init__(name, dataset, hconfigs, dropout_p)

    def backprop_layer(self, G_y, hconfig, pm, aux):
        if self.dropout_p is not None and self.is_training and hconfig is not None:
            x, y, dropout_mask = aux
            G_y *= dropout_mask
        else:
            x, y = aux
        if hconfig is not None:
            G_y = relu_derv(y) * G_y
        g_y_weight = x.transpose()
        g_y_input = pm['w'].transpose()
        G_weight = np.matmul(g_y_weight, G_y)
        G_bias = np.sum(G_y, axis=0)
        G_input = np.matmul(G_y, g_y_input)
        self.update_param(pm, 'w', G_weight)
        self.update_param(pm, 'b', G_bias)
        return G_input

    def update_param(self, pm, key, delta):
        if not self.use_adam:
            pm[key] -= self.learning_rate * delta
            return
        ro_1, ro_2, eps = 0.9, 0.999, 1.0e-8
        key_ro_1, key_ro_2 = 'm/' + key, 'v/' + key
        if key_ro_1 not in pm:
            pm[key_ro_1] = np.zeros(pm[key].shape)
            pm[key_ro_2] = np.zeros(pm[key].shape)
        pm[key_ro_1] = ro_1 * pm[key_ro_1] + (1 - ro_1) * delta
        pm[key_ro_2] = ro_2 * pm[key_ro_2] + (1 - ro_2) * delta ** 2
        m_hat = pm[key_ro_1] / (1 - ro_1)
        v_hat = pm[key_ro_2] / (1 - ro_2)
        pm[key] -= self.learning_rate * m_hat / (np.sqrt(v_hat) + eps)

## 2. abalone dataset 불러오기

In [65]:
import numpy as np
import time
import os
import csv
import copy    # chap 9
import wave    # chap 11
import cv2     # chap 12
import matplotlib.pyplot as plt

from PIL import Image
from IPython.core.display import HTML # chap 14
def relu(x):
    return np.maximum(x, 0)

def relu_derv(y):
    return np.sign(y)
def sigmoid(x):
    return np.exp(-relu(-x)) / (1.0 + np.exp(-np.abs(x)))

def sigmoid_derv(y):
    return y * (1 - y)

def sigmoid_cross_entropy_with_logits(z, x):
    return relu(x) - x * z + np.log(1 + np.exp(-np.abs(x)))

def sigmoid_cross_entropy_with_logits_derv(z, x):
    return -z + sigmoid(x)
def tanh(x):
    return 2 * sigmoid(2*x) - 1

def tanh_derv(y):
    return (1.0 + y) * (1.0 - y)
def softmax(x):
    max_elem = np.max(x, axis=1)
    diff = (x.transpose() - max_elem).transpose()
    exp = np.exp(diff)
    sum_exp = np.sum(exp, axis=1)
    probs = (exp.transpose() / sum_exp).transpose()
    return probs

def softmax_cross_entropy_with_logits(labels, logits):
    probs = softmax(logits)
    return -np.sum(labels * np.log(probs+1.0e-10), axis=1)

def softmax_cross_entropy_with_logits_derv(labels, logits):
    return softmax(logits) - labels
def load_csv(path, skip_header=True):
    with open(path) as csvfile:
        csvreader = csv.reader(csvfile)
        headers = None
        if skip_header: headers = next(csvreader, None)
        rows = []
        for row in csvreader:
            rows.append(row)

    return rows, headers
def onehot(xs, cnt):
    return np.eye(cnt)[np.array(xs).astype(int)]

def vector_to_str(x, fmt='%.2f', max_cnt=0):
    if max_cnt == 0 or len(x) <= max_cnt:
        return '[' + ','.join([fmt]*len(x)) % tuple(x) + ']'
    v = x[0:max_cnt]
    return '[' + ','.join([fmt]*len(v)) % tuple(v) + ',...]'
def load_image_pixels(imagepath, resolution, input_shape):
    img = Image.open(imagepath)
    resized = img.resize(resolution)
    return np.array(resized).reshape(input_shape)

def draw_images_horz(xs, image_shape=None):
    show_cnt = len(xs)
    fig, axes = plt.subplots(1, show_cnt, figsize=(5,5))
    for n in range(show_cnt):
        img = xs[n]
        if image_shape:
            x3d = img.reshape(image_shape)
            img = Image.fromarray(np.uint8(x3d))
        axes[n].imshow(img)
        axes[n].axis('off')
    plt.draw()
    plt.show()
def show_select_results(est, ans, target_names, max_cnt=0):
    for n in range(len(est)):
        pstr = vector_to_str(100*est[n], '%2.0f', max_cnt)
        estr = target_names[np.argmax(est[n])]
        astr = target_names[np.argmax(ans[n])]
        rstr = 'O'
        if estr != astr: rstr = 'X'
        print('추정확률분포 {} => 추정 {} : 정답 {} => {}'. \
              format(pstr, estr, astr, rstr))
def list_dir(path):
    filenames = os.listdir(path)
    filenames.sort()
    return filenames

In [66]:
class Dataset(object):
    def __init__(self, name, mode):
        self.name = name
        self.mode = mode

    def __str__(self):
        return '{}({}, {}+{}+{})'.format(self.name, self.mode, \
                   len(self.tr_xs), len(self.te_xs), len(self.va_xs))

    @property
    def train_count(self):
        return len(self.tr_xs)
def dataset_get_train_data(self, batch_size, nth):
    from_idx = nth * batch_size
    to_idx = (nth + 1) * batch_size

    tr_X = self.tr_xs[self.indices[from_idx:to_idx]]
    tr_Y = self.tr_ys[self.indices[from_idx:to_idx]]

    return tr_X, tr_Y


def dataset_shuffle_train_data(self, size):
    self.indices = np.arange(size)
    np.random.shuffle(self.indices)

Dataset.get_train_data = dataset_get_train_data
Dataset.shuffle_train_data = dataset_shuffle_train_data
def dataset_get_test_data(self):
    return self.te_xs, self.te_ys

Dataset.get_test_data = dataset_get_test_data
def dataset_get_validate_data(self, count):
    self.va_indices = np.arange(len(self.va_xs))
    np.random.shuffle(self.va_indices)

    va_X = self.va_xs[self.va_indices[0:count]]
    va_Y = self.va_ys[self.va_indices[0:count]]

    return va_X, va_Y

Dataset.get_validate_data = dataset_get_validate_data
Dataset.get_visualize_data = dataset_get_validate_data
def dataset_shuffle_data(self, xs, ys, tr_ratio=0.8, va_ratio=0.05):
    data_count = len(xs)

    tr_cnt = int(data_count * tr_ratio / 10) * 10
    va_cnt = int(data_count * va_ratio)
    te_cnt = data_count - (tr_cnt + va_cnt)

    tr_from, tr_to = 0, tr_cnt
    va_from, va_to = tr_cnt, tr_cnt + va_cnt
    te_from, te_to = tr_cnt + va_cnt, data_count

    indices = np.arange(data_count)
    np.random.shuffle(indices)

    self.tr_xs = xs[indices[tr_from:tr_to]]
    self.tr_ys = ys[indices[tr_from:tr_to]]
    self.va_xs = xs[indices[va_from:va_to]]
    self.va_ys = ys[indices[va_from:va_to]]
    self.te_xs = xs[indices[te_from:te_to]]
    self.te_ys = ys[indices[te_from:te_to]]

    self.input_shape = xs[0].shape
    self.output_shape = ys[0].shape

    return indices[tr_from:tr_to], indices[va_from:va_to], indices[te_from:te_to]

Dataset.shuffle_data = dataset_shuffle_data
def dataset_forward_postproc(self, output, y, mode=None):
    if mode is None: mode = self.mode

    if mode == 'regression':
        diff = output - y
        square = np.square(diff)
        loss = np.mean(square)
        aux = diff
    elif mode == 'binary':
        entropy = sigmoid_cross_entropy_with_logits(y, output)
        loss = np.mean(entropy)
        aux = [y, output]
    elif mode == 'select':
        entropy = softmax_cross_entropy_with_logits(y, output)
        loss = np.mean(entropy)
        aux = [output, y, entropy]

    return loss, aux

Dataset.forward_postproc = dataset_forward_postproc
def dataset_backprop_postproc(self, G_loss, aux, mode=None):
    if mode is None: mode = self.mode

    if mode == 'regression':
        diff = aux
        shape = diff.shape

        g_loss_square = np.ones(shape) / np.prod(shape)
        g_square_diff = 2 * diff
        g_diff_output = 1

        G_square = g_loss_square * G_loss
        G_diff = g_square_diff * G_square
        G_output = g_diff_output * G_diff
    elif mode == 'binary':
        y, output = aux
        shape = output.shape

        g_loss_entropy = np.ones(shape) / np.prod(shape)
        g_entropy_output = sigmoid_cross_entropy_with_logits_derv(y, output)

        G_entropy = g_loss_entropy * G_loss
        G_output = g_entropy_output * G_entropy
    elif mode == 'select':
        output, y, entropy = aux

        g_loss_entropy = 1.0 / np.prod(entropy.shape)
        g_entropy_output = softmax_cross_entropy_with_logits_derv(y, output)

        G_entropy = g_loss_entropy * G_loss
        G_output = g_entropy_output * G_entropy

    return G_output

Dataset.backprop_postproc = dataset_backprop_postproc
def dataset_eval_accuracy(self, x, y, output, mode=None):
    if mode is None: mode = self.mode

    if mode == 'regression':
        mse = np.mean(np.square(output - y))
        accuracy = 1 - np.sqrt(mse) / np.mean(y)
    elif mode == 'binary':
        estimate = np.greater(output, 0)
        answer = np.equal(y, 1.0)
        correct = np.equal(estimate, answer)
        accuracy = np.mean(correct)
    elif mode == 'select':
        estimate = np.argmax(output, axis=1)
        answer = np.argmax(y, axis=1)
        correct = np.equal(estimate, answer)
        accuracy = np.mean(correct)

    return accuracy

Dataset.eval_accuracy = dataset_eval_accuracy
def dataset_get_estimate(self, output, mode=None):
    if mode is None: mode = self.mode

    if mode == 'regression':
        estimate = output
    elif mode == 'binary':
        estimate = sigmoid(output)
    elif mode == 'select':
        estimate = softmax(output)

    return estimate

Dataset.get_estimate = dataset_get_estimate
def dataset_train_prt_result(self, epoch, costs, accs, acc, time1, time2):
    print('    Epoch {}: cost={:5.3f}, accuracy={:5.3f}/{:5.3f} ({}/{} secs)'. \
          format(epoch, np.mean(costs), np.mean(accs), acc, time1, time2))

def dataset_test_prt_result(self, name, acc, time):
    print('Model {} test report: accuracy = {:5.3f}, ({} secs)\n'. \
          format(name, acc, time))

Dataset.train_prt_result = dataset_train_prt_result
Dataset.test_prt_result = dataset_test_prt_result

In [67]:
class AbaloneDataset(Dataset):
    def __init__(self):
        super(AbaloneDataset, self).__init__('abalone', 'regression')

        rows, _ = load_csv('/content/sample_data/abalone.csv')

        xs = np.zeros([len(rows), 10])
        ys = np.zeros([len(rows), 1])

        for n, row in enumerate(rows):
            if row[0] == 'I': xs[n, 0] = 1
            if row[0] == 'M': xs[n, 1] = 1
            if row[0] == 'F': xs[n, 2] = 1
            xs[n, 3:] = row[1:-1]
            ys[n, :] = row[-1:]

        self.shuffle_data(xs, ys, 0.8)

    def visualize(self, xs, estimates, answers):
        for n in range(len(xs)):
            x, est, ans = xs[n], estimates[n], answers[n]
            xstr = vector_to_str(x, '%4.2f')
            print('{} => 추정 {:4.1f} : 정답 {:4.1f}'.
                  format(xstr, est[0], ans[0]))

## 3. Adam 모델

In [68]:
class AdamModel(MlpModel):
    def __init__(self, name, dataset, hconfigs):
        self.use_adam = True
        super(AdamModel, self).__init__(name, dataset, hconfigs)
def adam_backprop_layer(self, G_y, hconfig, pm, aux):
    x, y = aux

    if hconfig is not None: G_y = relu_derv(y) * G_y

    g_y_weight = x.transpose()
    g_y_input = pm['w'].transpose()

    G_weight = np.matmul(g_y_weight, G_y)
    G_bias = np.sum(G_y, axis=0)
    G_input = np.matmul(G_y, g_y_input)

    self.update_param(pm, 'w',  G_weight)
    self.update_param(pm, 'b',  G_bias)

    return G_input

AdamModel.backprop_layer = adam_backprop_layer
def adam_update_param(self, pm, key, delta):
    if self.use_adam:
        delta = self.eval_adam_delta(pm, key, delta)

    pm[key] -= self.learning_rate * delta

AdamModel.update_param = adam_update_param
def adam_eval_adam_delta(self, pm, key, delta):
    ro_1 = 0.9
    ro_2 = 0.999
    epsilon = 1.0e-8

    skey, tkey, step = 's' + key, 't' + key, 'n' + key
    if skey not in pm:
        pm[skey] = np.zeros(pm[key].shape)
        pm[tkey] = np.zeros(pm[key].shape)
        pm[step] = 0

    s = pm[skey] = ro_1 * pm[skey] + (1 - ro_1) * delta
    t = pm[tkey] = ro_2 * pm[tkey] + (1 - ro_2) * (delta * delta)

    pm[step] += 1
    s = s / (1 - np.power(ro_1, pm[step]))
    t = t / (1 - np.power(ro_2, pm[step]))

    return s / (np.sqrt(t)+epsilon)

AdamModel.eval_adam_delta = adam_eval_adam_delta

## 4. 아다그라드 모델

In [75]:
import numpy as np

# AdagradModel 클래스는 MlpModel 클래스를 상속받아 Adagrad 알고리즘을 사용하도록 확장
class AdagradModel(MlpModel):
    def __init__(self, name, dataset, hconfigs):
        self.use_adagrad = True  # Adagrad 알고리즘 사용 여부를 설정
        super(AdagradModel, self).__init__(name, dataset, hconfigs)  # 부모 클래스의 초기화 메서드를 호출

# adagrad_backprop_layer 함수는 역전파 단계에서 각 층의 기울기를 계산하고 업데이트
def adagrad_backprop_layer(self, G_y, hconfig, pm, aux):
    x, y = aux

    if hconfig is not None:
        G_y = relu_derv(y) * G_y  # 활성화 함수의 기울기를 곱하여 최종 기울기를 계산

    g_y_weight = x.transpose()  # 입력 데이터의 전치 행렬
    g_y_input = pm['w'].transpose()  # 가중치 행렬의 전치 행렬

    G_weight = np.matmul(g_y_weight, G_y)  # 가중치에 대한 기울기 계산
    G_bias = np.sum(G_y, axis=0)  # 편향에 대한 기울기 계산
    G_input = np.matmul(G_y, g_y_input)  # 입력에 대한 기울기 계산

    self.update_param(pm, 'w', G_weight)  # 가중치 파라미터 업데이트
    self.update_param(pm, 'b', G_bias)  # 편향 파라미터 업데이트

    return G_input  # 이전 층으로 전달할 기울기 반환

# AdagradModel 클래스의 backprop_layer 메서드를 adagrad_backprop_layer 함수로 설정
AdagradModel.backprop_layer = adagrad_backprop_layer

# adagrad_update_param 함수는 파라미터 업데이트
def adagrad_update_param(self, pm, key, delta):
    if self.use_adagrad:
        delta = self.eval_adagrad_delta(pm, key, delta)  # Adagrad 알고리즘을 적용하여 delta 값 조정

    pm[key] -= self.learning_rate * delta  # 학습률을 곱한 delta 값을 사용하여 파라미터 업데이트

# AdagradModel 클래스의 update_param 메서드를 adagrad_update_param 함수로 설정
AdagradModel.update_param = adagrad_update_param

# adagrad_eval_adagrad_delta 함수는 Adagrad 알고리즘을 적용하여 기울기(delta)를 조정
def adagrad_eval_adagrad_delta(self, pm, key, delta):
    epsilon = 1.0e-8  # 분모가 0이 되는 것을 방지하기 위한 작은 값

    grad_squared_key = 'gs' + key  # 기울기 제곱합을 저장할 키 생성
    if grad_squared_key not in pm:
        pm[grad_squared_key] = np.zeros(pm[key].shape)  # 기울기 제곱합이 저장되지 않은 경우 0으로 초기화

    pm[grad_squared_key] += delta * delta  # 기울기의 제곱을 기울기 제곱합에 누적

    adjusted_delta = delta / (np.sqrt(pm[grad_squared_key]) + epsilon)  # 기울기 제곱합의 제곱근을 계산하여 delta 값을 조정
    return adjusted_delta  # 조정된 delta 값 반환

# AdagradModel 클래스의 eval_adagrad_delta 메서드를 adagrad_eval_adagrad_delta 함수로 설정
AdagradModel.eval_adagrad_delta = adagrad_eval_adagrad_delta

## 5. 알엠에스프롭

In [76]:
import numpy as np

# RMSPropModel 클래스는 MlpModel 클래스를 상속받아 RMSProp 알고리즘을 사용하도록 확장
class RMSPropModel(MlpModel):
    def __init__(self, name, dataset, hconfigs):
        self.use_rmsprop = True  # RMSProp 알고리즘 사용 여부를 설정
        super(RMSPropModel, self).__init__(name, dataset, hconfigs)  # 부모 클래스의 초기화 메서드를 호출

# rmsprop_backprop_layer 함수는 역전파 단계에서 각 층의 기울기를 계산하고 업데이트
def rmsprop_backprop_layer(self, G_y, hconfig, pm, aux):
    x, y = aux

    if hconfig is not None:
        G_y = relu_derv(y) * G_y  # 활성화 함수의 기울기를 곱하여 최종 기울기를 계산

    g_y_weight = x.transpose()  # 입력 데이터의 전치 행렬
    g_y_input = pm['w'].transpose()  # 가중치 행렬의 전치 행렬

    G_weight = np.matmul(g_y_weight, G_y)  # 가중치에 대한 기울기 계산
    G_bias = np.sum(G_y, axis=0)  # 편향에 대한 기울기 계산
    G_input = np.matmul(G_y, g_y_input)  # 입력에 대한 기울기 계산

    self.update_param(pm, 'w', G_weight)  # 가중치 파라미터 업데이트
    self.update_param(pm, 'b', G_bias)  # 편향 파라미터 업데이트

    return G_input  # 이전 층으로 전달할 기울기 반환

# RMSPropModel 클래스의 backprop_layer 메서드를 rmsprop_backprop_layer 함수로 설정
RMSPropModel.backprop_layer = rmsprop_backprop_layer

# rmsprop_update_param 함수는 파라미터 업데이트
def rmsprop_update_param(self, pm, key, delta):
    if self.use_rmsprop:
        delta = self.eval_rmsprop_delta(pm, key, delta)  # RMSProp 알고리즘을 적용하여 delta 값 조정

    pm[key] -= self.learning_rate * delta  # 학습률을 곱한 delta 값을 사용하여 파라미터 업데이트

# RMSPropModel 클래스의 update_param 메서드를 rmsprop_update_param 함수로 설정
RMSPropModel.update_param = rmsprop_update_param

# rmsprop_eval_rmsprop_delta 함수는 RMSProp 알고리즘을 적용하여 기울기(delta)를 조정
def rmsprop_eval_rmsprop_delta(self, pm, key, delta):
    ro = 0.9  # 감쇠율
    epsilon = 1.0e-8  # 분모가 0이 되는 것을 방지하기 위한 작은 값

    mean_squared_key = 'ms' + key  # 기울기 제곱의 지수 가중 이동 평균을 저장할 키 생성
    if mean_squared_key not in pm:
        pm[mean_squared_key] = np.zeros(pm[key].shape)  # 기울기 제곱의 지수 가중 이동 평균이 저장되지 않은 경우 0으로 초기화

    # 기울기 제곱의 지수 가중 이동 평균 업데이트
    pm[mean_squared_key] = ro * pm[mean_squared_key] + (1 - ro) * (delta * delta)

    # 기울기 제곱의 지수 가중 이동 평균의 제곱근을 계산하여 delta 값을 조정
    adjusted_delta = delta / (np.sqrt(pm[mean_squared_key]) + epsilon)
    return adjusted_delta  # 조정된 delta 값 반환

# RMSPropModel 클래스의 eval_rmsprop_delta 메서드를 rmsprop_eval_rmsprop_delta 함수로 설정
RMSPropModel.eval_rmsprop_delta = rmsprop_eval_rmsprop_delta

## 6. 모멘텀

In [77]:
import numpy as np

class MomentumModel(MlpModel):
    def __init__(self, name, dataset, hconfigs):
        self.use_momentum = True
        self.momentum_rate = 0.9  # 모멘텀 계수 설정
        super(MomentumModel, self).__init__(name, dataset, hconfigs)

def momentum_backprop_layer(self, G_y, hconfig, pm, aux):
    x, y = aux

    if hconfig is not None:
        G_y = relu_derv(y) * G_y

    g_y_weight = x.transpose()
    g_y_input = pm['w'].transpose()

    G_weight = np.matmul(g_y_weight, G_y)
    G_bias = np.sum(G_y, axis=0)
    G_input = np.matmul(G_y, g_y_input)

    self.update_param(pm, 'w', G_weight)
    self.update_param(pm, 'b', G_bias)

    return G_input

MomentumModel.backprop_layer = momentum_backprop_layer

def momentum_update_param(self, pm, key, delta):
    if self.use_momentum:
        delta = self.eval_momentum_delta(pm, key, delta)

    pm[key] -= self.learning_rate * delta

MomentumModel.update_param = momentum_update_param

def momentum_eval_momentum_delta(self, pm, key, delta):
    vkey = 'v' + key

    if vkey not in pm:
        pm[vkey] = np.zeros(pm[key].shape)

    pm[vkey] = self.momentum_rate * pm[vkey] + delta

    return pm[vkey]

MomentumModel.eval_momentum_delta = momentum_eval_momentum_delta


## 7. 네스테로프 모멘텀

In [78]:
import numpy as np

class NesterovMomentumModel(MlpModel):
    def __init__(self, name, dataset, hconfigs):
        self.use_nesterov_momentum = True
        self.momentum_rate = 0.9  # 모멘텀 계수 설정
        super(NesterovMomentumModel, self).__init__(name, dataset, hconfigs)

def nesterov_backprop_layer(self, G_y, hconfig, pm, aux):
    x, y = aux

    if hconfig is not None:
        G_y = relu_derv(y) * G_y

    g_y_weight = x.transpose()
    g_y_input = pm['w'].transpose()

    G_weight = np.matmul(g_y_weight, G_y)
    G_bias = np.sum(G_y, axis=0)
    G_input = np.matmul(G_y, g_y_input)

    self.update_param(pm, 'w', G_weight)
    self.update_param(pm, 'b', G_bias)

    return G_input

NesterovMomentumModel.backprop_layer = nesterov_backprop_layer

def nesterov_update_param(self, pm, key, delta):
    if self.use_nesterov_momentum:
        delta = self.eval_nesterov_momentum_delta(pm, key, delta)

    pm[key] -= self.learning_rate * delta

NesterovMomentumModel.update_param = nesterov_update_param

def nesterov_eval_nesterov_momentum_delta(self, pm, key, delta):
    vkey = 'v' + key

    if vkey not in pm:
        pm[vkey] = np.zeros(pm[key].shape)

    v_prev = pm[vkey]
    pm[vkey] = self.momentum_rate * pm[vkey] + delta
    nesterov_delta = self.momentum_rate * pm[vkey] + (1 + self.momentum_rate) * delta

    return nesterov_delta

NesterovMomentumModel.eval_nesterov_momentum_delta = nesterov_eval_nesterov_momentum_delta


## 8. 아다델타

In [79]:
import numpy as np

class AdadeltaModel(MlpModel):
    def __init__(self, name, dataset, hconfigs):
        self.use_adadelta = True
        super(AdadeltaModel, self).__init__(name, dataset, hconfigs)

def adadelta_backprop_layer(self, G_y, hconfig, pm, aux):
    x, y = aux

    if hconfig is not None:
        G_y = relu_derv(y) * G_y

    g_y_weight = x.transpose()
    g_y_input = pm['w'].transpose()

    G_weight = np.matmul(g_y_weight, G_y)
    G_bias = np.sum(G_y, axis=0)
    G_input = np.matmul(G_y, g_y_input)

    self.update_param(pm, 'w', G_weight)
    self.update_param(pm, 'b', G_bias)

    return G_input

AdadeltaModel.backprop_layer = adadelta_backprop_layer

def adadelta_update_param(self, pm, key, delta):
    if self.use_adadelta:
        delta = self.eval_adadelta_delta(pm, key, delta)

    pm[key] -= delta

AdadeltaModel.update_param = adadelta_update_param

def adadelta_eval_adadelta_delta(self, pm, key, delta):
    ro = 0.95
    epsilon = 1e-6

    skey, tkey, step = 's' + key, 't' + key, 'n' + key

    if skey not in pm:
        pm[skey] = np.zeros(pm[key].shape)
        pm[tkey] = np.zeros(pm[key].shape)

    pm[skey] = ro * pm[skey] + (1 - ro) * np.square(delta)
    update = np.sqrt((pm[tkey] + epsilon) / (pm[skey] + epsilon)) * delta
    pm[tkey] = ro * pm[tkey] + (1 - ro) * np.square(update)

    return update

AdadeltaModel.eval_adadelta_delta = adadelta_eval_adadelta_delta


## 6. MLP 실행

In [83]:
ad = AbaloneDataset()
am = AdamModel('abalone_model', ad, [200,100,50])
am.exec_all(epoch_count=150, report=2)

Model abalone_model train started:
    Epoch 2: cost=7.038, accuracy=0.751/0.729 (1/1 secs)
    Epoch 4: cost=6.509, accuracy=0.761/0.750 (0/1 secs)
    Epoch 6: cost=6.208, accuracy=0.768/0.766 (0/1 secs)
    Epoch 8: cost=5.831, accuracy=0.774/0.729 (0/1 secs)
    Epoch 10: cost=5.520, accuracy=0.779/0.748 (0/1 secs)
    Epoch 12: cost=5.282, accuracy=0.784/0.784 (0/1 secs)
    Epoch 14: cost=5.140, accuracy=0.787/0.762 (1/2 secs)
    Epoch 16: cost=5.086, accuracy=0.787/0.775 (0/2 secs)
    Epoch 18: cost=5.041, accuracy=0.789/0.775 (0/2 secs)
    Epoch 20: cost=5.010, accuracy=0.788/0.781 (0/2 secs)
    Epoch 22: cost=5.019, accuracy=0.788/0.777 (0/2 secs)
    Epoch 24: cost=5.024, accuracy=0.789/0.790 (0/2 secs)
    Epoch 26: cost=4.970, accuracy=0.789/0.777 (1/3 secs)
    Epoch 28: cost=4.960, accuracy=0.789/0.780 (0/3 secs)
    Epoch 30: cost=4.993, accuracy=0.790/0.754 (0/3 secs)
    Epoch 32: cost=4.964, accuracy=0.789/0.745 (0/3 secs)
    Epoch 34: cost=4.962, accuracy=0.789/

In [80]:
ad = AbaloneDataset()
am = AdagradModel('abalone_model', ad, [4])
am.exec_all(epoch_count=150, report=2)

Model abalone_model train started:
    Epoch 2: cost=49.318, accuracy=0.308/0.370 (0/0 secs)
    Epoch 4: cost=17.877, accuracy=0.596/0.603 (0/0 secs)
    Epoch 6: cost=9.501, accuracy=0.716/0.725 (0/0 secs)
    Epoch 8: cost=7.826, accuracy=0.740/0.691 (0/0 secs)
    Epoch 10: cost=7.526, accuracy=0.745/0.699 (1/1 secs)
    Epoch 12: cost=7.457, accuracy=0.744/0.769 (0/1 secs)
    Epoch 14: cost=7.427, accuracy=0.745/0.745 (0/1 secs)
    Epoch 16: cost=7.403, accuracy=0.745/0.772 (0/1 secs)
    Epoch 18: cost=7.381, accuracy=0.745/0.733 (0/1 secs)
    Epoch 20: cost=7.360, accuracy=0.745/0.746 (0/1 secs)
    Epoch 22: cost=7.340, accuracy=0.744/0.734 (1/2 secs)
    Epoch 24: cost=7.320, accuracy=0.745/0.708 (0/2 secs)
    Epoch 26: cost=7.302, accuracy=0.748/0.739 (0/2 secs)
    Epoch 28: cost=7.284, accuracy=0.747/0.728 (0/2 secs)
    Epoch 30: cost=7.267, accuracy=0.747/0.735 (0/2 secs)
    Epoch 32: cost=7.250, accuracy=0.746/0.715 (0/2 secs)
    Epoch 34: cost=7.234, accuracy=0.74

In [81]:
ad = AbaloneDataset()
am = RMSPropModel('abalone_model', ad, [4])
am.exec_all(epoch_count=150, report=2)

Model abalone_model train started:
    Epoch 2: cost=35.716, accuracy=0.423/0.509 (1/1 secs)
    Epoch 4: cost=10.534, accuracy=0.691/0.692 (0/1 secs)
    Epoch 6: cost=10.467, accuracy=0.690/0.710 (0/1 secs)
    Epoch 8: cost=10.465, accuracy=0.690/0.685 (0/1 secs)
    Epoch 10: cost=10.463, accuracy=0.691/0.707 (0/1 secs)
    Epoch 12: cost=10.467, accuracy=0.692/0.679 (0/1 secs)
    Epoch 14: cost=10.462, accuracy=0.691/0.694 (0/1 secs)
    Epoch 16: cost=10.463, accuracy=0.690/0.715 (0/1 secs)
    Epoch 18: cost=10.464, accuracy=0.689/0.700 (1/2 secs)
    Epoch 20: cost=10.466, accuracy=0.689/0.713 (0/2 secs)
    Epoch 22: cost=10.466, accuracy=0.690/0.713 (0/2 secs)
    Epoch 24: cost=10.464, accuracy=0.687/0.735 (0/2 secs)
    Epoch 26: cost=10.463, accuracy=0.690/0.678 (0/2 secs)
    Epoch 28: cost=10.464, accuracy=0.691/0.690 (0/2 secs)
    Epoch 30: cost=10.465, accuracy=0.690/0.704 (1/3 secs)
    Epoch 32: cost=10.466, accuracy=0.690/0.685 (0/3 secs)
    Epoch 34: cost=10.459

In [82]:
ad = AbaloneDataset()
am = MomentumModel('abalone_model', ad, [4])
am.exec_all(epoch_count=150, report=2)

Model abalone_model train started:
    Epoch 2: cost=10.577, accuracy=0.690/0.677 (0/0 secs)
    Epoch 4: cost=10.523, accuracy=0.688/0.634 (0/0 secs)
    Epoch 6: cost=10.600, accuracy=0.689/0.659 (0/0 secs)
    Epoch 8: cost=10.565, accuracy=0.688/0.682 (1/1 secs)
    Epoch 10: cost=10.561, accuracy=0.688/0.701 (0/1 secs)
    Epoch 12: cost=10.515, accuracy=0.689/0.636 (0/1 secs)
    Epoch 14: cost=10.589, accuracy=0.689/0.632 (0/1 secs)
    Epoch 16: cost=10.522, accuracy=0.688/0.688 (0/1 secs)
    Epoch 18: cost=10.567, accuracy=0.690/0.650 (0/1 secs)
    Epoch 20: cost=10.550, accuracy=0.689/0.640 (0/1 secs)
    Epoch 22: cost=10.538, accuracy=0.691/0.677 (0/1 secs)
    Epoch 24: cost=10.567, accuracy=0.689/0.647 (0/1 secs)
    Epoch 26: cost=10.544, accuracy=0.688/0.697 (0/1 secs)
    Epoch 28: cost=10.544, accuracy=0.691/0.712 (1/2 secs)
    Epoch 30: cost=10.565, accuracy=0.691/0.637 (0/2 secs)
    Epoch 32: cost=10.568, accuracy=0.689/0.684 (0/2 secs)
    Epoch 34: cost=10.558

In [84]:
ad = AbaloneDataset()
am = NesterovMomentumModel('NesterovMomentumModel', ad, [4])
am.exec_all(epoch_count=150, report=2)

Model NesterovMomentumModel train started:
    Epoch 2: cost=5.910, accuracy=0.771/0.798 (0/0 secs)
    Epoch 4: cost=5.488, accuracy=0.780/0.766 (0/0 secs)
    Epoch 6: cost=5.588, accuracy=0.778/0.764 (0/0 secs)
    Epoch 8: cost=5.428, accuracy=0.780/0.775 (1/1 secs)
    Epoch 10: cost=5.201, accuracy=0.785/0.745 (0/1 secs)
    Epoch 12: cost=5.062, accuracy=0.790/0.755 (0/1 secs)
    Epoch 14: cost=5.318, accuracy=0.784/0.730 (0/1 secs)
    Epoch 16: cost=5.277, accuracy=0.784/0.758 (0/1 secs)
    Epoch 18: cost=5.129, accuracy=0.787/0.790 (0/1 secs)
    Epoch 20: cost=5.797, accuracy=0.774/0.788 (0/1 secs)
    Epoch 22: cost=5.084, accuracy=0.787/0.756 (0/1 secs)
    Epoch 24: cost=5.167, accuracy=0.787/0.788 (1/2 secs)
    Epoch 26: cost=5.253, accuracy=0.785/0.794 (0/2 secs)
    Epoch 28: cost=5.070, accuracy=0.788/0.805 (0/2 secs)
    Epoch 30: cost=5.009, accuracy=0.789/0.782 (0/2 secs)
    Epoch 32: cost=5.024, accuracy=0.788/0.790 (0/2 secs)
    Epoch 34: cost=5.047, accurac

In [85]:
ad = AbaloneDataset()
am = AdadeltaModel('AdadeltaModel', ad, [4])
am.exec_all(epoch_count=150, report=2)

Model AdadeltaModel train started:
    Epoch 2: cost=6.623, accuracy=0.760/0.760 (0/0 secs)
    Epoch 4: cost=6.065, accuracy=0.771/0.793 (0/0 secs)
    Epoch 6: cost=5.611, accuracy=0.779/0.770 (1/1 secs)
    Epoch 8: cost=5.356, accuracy=0.785/0.809 (0/1 secs)
    Epoch 10: cost=5.190, accuracy=0.789/0.802 (0/1 secs)
    Epoch 12: cost=5.105, accuracy=0.787/0.820 (0/1 secs)
    Epoch 14: cost=5.055, accuracy=0.789/0.779 (0/1 secs)
    Epoch 16: cost=5.017, accuracy=0.789/0.797 (1/2 secs)
    Epoch 18: cost=5.016, accuracy=0.788/0.806 (0/2 secs)
    Epoch 20: cost=4.995, accuracy=0.789/0.786 (0/2 secs)
    Epoch 22: cost=4.996, accuracy=0.790/0.803 (0/2 secs)
    Epoch 24: cost=4.993, accuracy=0.790/0.787 (0/2 secs)
    Epoch 26: cost=4.983, accuracy=0.791/0.812 (1/3 secs)
    Epoch 28: cost=4.976, accuracy=0.789/0.785 (0/3 secs)
    Epoch 30: cost=4.977, accuracy=0.789/0.803 (0/3 secs)
    Epoch 32: cost=4.977, accuracy=0.790/0.786 (0/3 secs)
    Epoch 34: cost=4.961, accuracy=0.792/