In [1]:
# 필요한 모듈 불러오기
import pandas as pd
import numpy as np
import csv
import time

# 난수 발생 패턴을 고정하기 위해 난수는 seed(1234)로 설정
np.random.seed(1234)

# 고정되지 않는 난수를 발생하기 위해서 randomize함수를 이용해 현재시간을 기준으로 난수를 발생하는 함수
def randomize() :
    np.random.seed(time.time)

In [2]:
# 정규분포 난수값의 평균과 표준편차를 설정하여 가중치의 파라미터를 초기화 할때 사용한다
RND_MEAN = 0
RND_STD = 0.0030

# 학습률을 설정(하이퍼파라미터)
LEARNING_RATE = 0.001

In [3]:
# 전체 함수 실행 함수
def pulsar_exec(epoch_count=10, mb_size=10, report=1):
    load_pulsar_dataset()
    init_model()
    train_and_test(epoch_count, mb_size, report)

In [4]:
# 딥러닝에 활용할 데이터 불러오기 및 정제 과정
def load_pulsar_dataset():
    with open('Pulsar.csv') as csvfile:
        csvreader = csv.reader(csvfile)
        next(csvreader,None)
        rows = []
        for row in csvreader:
            rows.append(row)

    global data, input_cnt, output_cnt
    input_cnt, output_cnt = 8, 1
    data = np.asarray(rows,dtype='float32')

In [5]:
global hidden_config

def init_model():
    if hidden_config is not None:
        print('은닉 계층 {}개를 갖는 다층 퍼셉트론이 작동되었습니다.'.format(len(hidden_config)))
        init_model_hiddens()
    else:
        print('은닉 계층 하나를 갖는 다층 퍼셉트론이 작동되었습니다.')
        init_model_hidden1()

def init_model_hidden1():
    global pm_output, pm_hidden, input_cnt, output_cnt, hidden_cnt

    pm_hidden = alloc_param_pair([input_cnt,hidden_cnt])
    pm_output = alloc_param_pair([hidden_cnt, output_cnt])

def init_model_hiddens():
    global pm_output, pm_hiddens, input_cnt, output_cnt, hidden_config

    pm_hiddens =[]
    prev_cnt = input_cnt
    
    for hidden_cnt in hidden_config:
        pm_hiddens.append(alloc_param_pair([prev_cnt, hidden_cnt]))
        prev_cnt = hidden_cnt

    pm_output = alloc_param_pair([prev_cnt, output_cnt])
    
def alloc_param_pair(shape):
    weight = np.random.normal(RND_MEAN, RND_STD,shape) 
    bias = np.zeros(shape[-1])
    return {'w':weight, 'b':bias}

In [6]:
def train_and_test(epoch_count, mb_size, report): 
    step_count = arrange_data(mb_size)
    test_x, test_y = get_test_data()
    for epoch in range(epoch_count):
        losses, accs = [], []
        for n in range(step_count):
            train_x, train_y = get_train_data(mb_size, n)
            loss, acc = run_train(train_x, train_y)
            losses.append(loss)
            accs.append(acc)
        if report > 0 and (epoch+1) % report == 0:
            acc = run_test(test_x, test_y)
            print('Epoch {}: loss ={:5.3f}, accuracy={:5.3f}/{:5.3f}'. \
                  format(epoch+1, np.mean(losses), np.mean(accs), acc))
    final_acc = run_test(test_x, test_y)
    print('\nFinal Test: final accuracy = {:5.3f}'.format(final_acc))

def arrange_data(mb_size):
    global data, shuffle_map, test_begin_idx
    shuffle_map = np.arange(data.shape[0])
    np.random.shuffle(shuffle_map)
    step_count = int(data.shape[0] * 0.8) // mb_size
    test_begin_idx = step_count * mb_size
    return step_count

def get_train_data(mb_size, nth):
    global data, shuffle_map, test_begin_idx, output_cnt
    if nth == 0:
        np.random.shuffle(shuffle_map[:test_begin_idx])
    train_data = data[shuffle_map[mb_size*nth:mb_size*(nth+1)]]
    return train_data[:, :-output_cnt], train_data[:, -output_cnt:]

def get_test_data():
    global data, shuffle_map, test_begin_idx, output_cnt
    test_data = data[shuffle_map[test_begin_idx:]]
    return test_data[:, :-output_cnt], test_data[:, -output_cnt:]

def run_train(x, y):
    output, aux_nn = forward_neuralnet(x)
    loss, aux_pp = forward_postproc(output, y)
    accuracy = eval_accuracy(output, y)
    G_loss = 1.0
    G_output = backprop_postproc(G_loss, aux_pp)
    backprop_neuralnet(G_output, aux_nn)
    return loss, accuracy

def run_test(x, y):
    output, _ = forward_neuralnet(x)
    accuracy = eval_accuracy(output, y)
    return accuracy

In [7]:
def set_hidden(info):
    global hidden_cnt, hidden_config
    if isinstance(info, int):
        hidden_cnt = info
        hidden_config = None
    else:
        hidden_config = info

In [None]:
def forward_neuralnet(x):
    if hidden_config is not None:
        return forward_neuralnet_hiddens(x)
    else:
        return forward_neuralnet_hidden1(x)

def forward_neuralnet_hidden1(x):
    global pm_output, pm_hidden

    hidden = relu(np.matmul(x,pm_hidden['w']) + pm_hidden['b'])
    output = np.matmul(hidden, pm_output['w']) + pm_output['b']
    return output, [x,hidden]

def forward_neuralnet_hiddens(x):
    global pm_output, pm_hiddens

    hidden = x
    hiddens = [x]

    for pm_hidden in pm_hiddens:
        hidden = relu(np.matmul(hidden, pm_hidden['w']) + pm_hidden['b'])
        hiddens.append(hidden)

    output = np.matmul(hidden, pm_output['w']) + pm_output['b']

    return output, hiddens

In [None]:
def backprop_neuralnet(G_output, hiddens):
    if hidden_config is not None:
        backprop_neuralnet_hiddens(G_output, hiddens)
    else:
        backprop_neuralnet_hidden1(G_output, hiddens)

def backprop_neuralnet_hidden1(G_output, aux):
    global pm_output, pm_hidden

    x, hidden = aux

    g_output_w_out = hidden.transpose()
    G_w_out = np.matmul(g_output_w_out, G_output)
    G_b_out = np.sum(G_output, axis = 0)

    g_output_hidden = pm_output['w'].transpose()
    G_hidden = np.matmul(G_output, g_output_hidden)

    pm_output['w'] -= LEARNING_RATE * G_w_out
    pm_output['b'] -= LEARNING_RATE * G_b_out

    G_hidden = G_hidden * relu_derv(hidden)

    g_hidden_w_hid = x.transpose()
    G_w_hid = np.matmul(g_hidden_w_hid, G_hidden)
    G_b_hid = np.sum(G_hidden, axis=0)

    pm_hidden['w'] -= LEARNING_RATE * G_w_hid
    pm_hidden['b'] -= LEARNING_RATE * G_b_hid
    
def backprop_neuralnet_hiddens(G_output, aux):
    global pm_output, pm_hiddens

    hiddens = aux

    g_output_w_out = hiddens[-1].transpose()
    G_w_out = np.matmul(g_output_w_out, G_output)
    G_b_out = np.sum(G_output, axis=0)

    g_output_hidden = pm_output['w'].transpose()
    G_hidden = np.matmul(G_output, g_output_hidden)

    pm_output['w'] -= LEARNING_RATE * G_w_out
    pm_output['b'] -= LEARNING_RATE * G_b_out

    for n in reversed(range(len(pm_hiddens))):
        G_hidden = G_hidden * relu_derv(hiddens[n+1])

        g_hidden_w_hid = hiddens[n].transpose()
        G_w_hid = np.matmul(g_hidden_w_hid, G_hidden)
        G_b_hid = np.sum(G_hidden, axis=0)

        g_hidden_hidden = pm_hiddens[n]['w'].transpose()
        G_hidden = np.matmul(G_hidden, g_hidden_hidden)

        pm_hiddens[n]['w'] -= LEARNING_RATE * G_w_hid
        pm_hiddens[n]['b'] -= LEARNING_RATE * G_b_hid

def relu_derv(y):
    return np.sign(y)

In [10]:
def forward_postproc(output, y):
    entropy = sigmoid_cross_entropy_with_logits(y, output)
    loss = np.mean(entropy)
    return loss, [y, output, entropy]

def backprop_postproc(G_loss, aux):
    y, output, entropy = aux

    g_loss_entropy = 1.0 / np.prod(entropy.shape)
    g_entropy_output = sigmoid_cross_entropy_with_logits_derv(y, output)

    G_entropy = g_loss_entropy * G_loss
    G_output = g_entropy_output * G_entropy

    return G_output

In [11]:
def eval_accuracy(output, y):
    estimate = np.greater(output, 0)
    answer = np.greater(y, 0.5)
    correct = np.equal(estimate, answer)
    return np.mean(correct)

In [12]:
def relu(x):
    return np.maximum(x,0)

def sigmoid(x):
    return np.exp(-relu(-x)) / (1.0 + np.exp(-np.abs(x)))

def sigmoid_derv(x,y):
    return y * (1-y)

def sigmoid_cross_entropy_with_logits(z,x):
    return relu(x) -x * z + np.log(1 + np.exp(-np.abs(x)))

def sigmoid_cross_entropy_with_logits_derv(z,x):
    return -z + sigmoid(x)

In [13]:
set_hidden(6)
pulsar_exec(epoch_count=50, mb_size=10, report=10)

은닉 계층 하나를 갖는 다층 퍼셉트론이 작동되었습니다.
Epoch 10: loss =0.094, accuracy=0.972/0.972
Epoch 20: loss =0.091, accuracy=0.973/0.974
Epoch 30: loss =0.091, accuracy=0.973/0.974
Epoch 40: loss =0.089, accuracy=0.974/0.974
Epoch 50: loss =0.089, accuracy=0.974/0.975

Final Test: final accuracy = 0.975


In [14]:
set_hidden([6])
pulsar_exec(epoch_count=50, mb_size=10, report=10)

은닉 계층 1개를 갖는 다층 퍼셉트론이 작동되었습니다.
Epoch 10: loss =0.090, accuracy=0.973/0.969
Epoch 20: loss =0.087, accuracy=0.974/0.969
Epoch 30: loss =0.085, accuracy=0.975/0.973
Epoch 40: loss =0.083, accuracy=0.976/0.972
Epoch 50: loss =0.083, accuracy=0.976/0.972

Final Test: final accuracy = 0.972


In [15]:
set_hidden([12,6])
pulsar_exec(epoch_count=200, mb_size=10, report=40)

은닉 계층 2개를 갖는 다층 퍼셉트론이 작동되었습니다.
Epoch 40: loss =0.090, accuracy=0.973/0.972
Epoch 80: loss =0.086, accuracy=0.975/0.974
Epoch 120: loss =0.084, accuracy=0.976/0.971
Epoch 160: loss =0.081, accuracy=0.976/0.972
Epoch 200: loss =0.081, accuracy=0.976/0.973

Final Test: final accuracy = 0.973
