In [2]:
import pandas as pd
import numpy as np
import csv

In [3]:
# 파일 불러오기
pulsars , stars = [],[]
with open('Pulsar.csv') as csvfile:
    csvreader = csv.reader(csvfile)
    next(csvreader, None)
    rows = []
    for row in csvreader:
        if row[8] == '1' : pulsars.append(row)
        else : stars.append(row)
        rows.append(row)

input_cnt, output_cnt = 8,1
star_cnt, pulsar_cnt = len(stars), len(pulsars)
data = np.zeros([2*star_cnt,9])
data[0:star_cnt, :] = np.asarray(stars, dtype='float32')
for n in range(star_cnt):
    data[star_cnt +n] = np.asarray(pulsars[n % pulsar_cnt],dtype='float32')

In [11]:
# weight와 bias를 포함한 파라미터 및 하이퍼파라미터 설정
RND_MEAN = 0
RND_STD = 0.0030
LEARNING_RATE = 0.001

weight = np.random.normal(RND_MEAN,RND_STD,[input_cnt,output_cnt])
bias = np.zeros([output_cnt])

epoch_count = 100
mb_size = 10
report = 1

In [12]:
# 데이터를 섞고 랜덤으로 추출하기
shuffle_map = np.arange(data.shape[0])
np.random.shuffle(shuffle_map)
step_count = int(data.shape[0] * 0.8) // mb_size
test_begin_idx = step_count * mb_size

#학습에 사용하게될 함수 정의
def relu(x):
    return np.maximum(x,0)
def sigmoid(x):
    return np.exp(-relu(-x)) / (1.0 + np.exp(-np.abs(x)))
def sigmid_derv(x,y):
    return y * (1-y)
def sigmoid_cross_entropy_with_logits(z,x):
    return relu(x) - x * z + np.log(1 + np.exp(-np.abs(x)))
def sigmoid_cross_entropy_with_logits_derv(z,x):
    return -z + sigmoid(x)
def safe_div(p, q):
    p, q = float(p), float(q)
    if np.abs(q) < 1.0e-20 : return np.sign(p)
    return p / q

# 정확도 계산
def eval_accuracy(output,y):
    est_yes = np.greater(output,0)
    ans_yes = np.greater(y,0.5)
    est_no = np.logical_not(est_yes)
    ans_no = np.logical_not(ans_yes)

    tp = np.sum(np.logical_and(est_yes,ans_yes))
    fp = np.sum(np.logical_and(est_yes,ans_no))
    fn = np.sum(np.logical_and(est_no,ans_yes))
    tn = np.sum(np.logical_and(est_no,ans_no))

    accuracy = safe_div(tp+tn,tp+tn+fp+fn)
    precision = safe_div(tp, tp+fp)
    recall = safe_div(tp, tp+fn)
    f1 = 2 * safe_div(recall *precision, recall+precision)

    return [accuracy,precision,recall,f1]


# 데이터 학습 및 결과 도출
for epoch in range(epoch_count):
    losses = []

    for n in range(step_count):
        if n == 0:
            np.random.shuffle(shuffle_map[:test_begin_idx])
        train_x = data[shuffle_map[mb_size*n:mb_size*(n+1)]][:, :-output_cnt]
        train_y = data[shuffle_map[mb_size*n:mb_size*(n+1)]][:, -output_cnt:]

        # forward path 연산
        output_train_x = np.matmul(train_x,weight) + bias
        entropy = sigmoid_cross_entropy_with_logits(train_y,output_train_x)
        loss = np.mean(entropy)
        losses.append(loss)
        

        # backporpagation 연산
        G_loss = 1.0

        g_loss_entropy = 1.0 / np.prod(entropy.shape)
        g_entorpy_output = sigmoid_cross_entropy_with_logits_derv(train_y,output_train_x)

        G_entropy = g_loss_entropy * G_loss
        G_output = g_entorpy_output * G_entropy

        g_outout_w = train_x.transpose()

        G_w = np.matmul(g_outout_w,G_output)
        G_b = np.sum(G_output, axis=0)

        weight -= LEARNING_RATE * G_w
        bias -= LEARNING_RATE * G_b

    if report > 0 and (epoch+1) % report == 0:
        test_data = data[shuffle_map[test_begin_idx:]]
        test_x = test_data[:, :-output_cnt]
        test_y = test_data[:, -output_cnt:]
        output_test_x = np.matmul(test_x,weight) + bias
        acc = eval_accuracy(output_test_x,test_y)
        acc_str = ','.join(['%5.3f']*4) % tuple(acc)
        print('Epoch {}: loss={:5.3f}, reslut={}'. \
              format(epoch+1,np.mean(losses),acc_str))

output_test_x = np.matmul(test_x,weight) + bias
final_acc = eval_accuracy(output_test_x,test_y)
final_acc_str = ','.join(['%5.3f']*4) % tuple(final_acc)
print('\nFinal Test : final result = {}'.format(final_acc_str))

Epoch 1: loss=0.411, reslut=0.918,0.929,0.908,0.918
Epoch 2: loss=0.381, reslut=0.897,0.990,0.804,0.888
Epoch 3: loss=0.372, reslut=0.927,0.948,0.905,0.926
Epoch 4: loss=0.389, reslut=0.765,0.693,0.963,0.806
Epoch 5: loss=0.363, reslut=0.838,0.774,0.962,0.858
Epoch 6: loss=0.355, reslut=0.914,0.961,0.865,0.911
Epoch 7: loss=0.376, reslut=0.928,0.948,0.907,0.927
Epoch 8: loss=0.381, reslut=0.827,0.763,0.957,0.849
Epoch 9: loss=0.364, reslut=0.902,0.989,0.815,0.894
Epoch 10: loss=0.366, reslut=0.884,0.843,0.946,0.892
Epoch 11: loss=0.368, reslut=0.916,0.975,0.855,0.911
Epoch 12: loss=0.369, reslut=0.907,0.888,0.935,0.911
Epoch 13: loss=0.367, reslut=0.919,0.941,0.897,0.919
Epoch 14: loss=0.365, reslut=0.922,0.964,0.879,0.919
Epoch 15: loss=0.375, reslut=0.926,0.973,0.879,0.923
Epoch 16: loss=0.373, reslut=0.923,0.933,0.915,0.924
Epoch 17: loss=0.358, reslut=0.924,0.937,0.912,0.924
Epoch 18: loss=0.369, reslut=0.709,0.637,0.990,0.776
Epoch 19: loss=0.378, reslut=0.657,0.606,0.924,0.732
Ep