In [1]:
import pandas as pd
import numpy as np
import csv

In [2]:
# 파일 불러오기
with open('Pulsar.csv') as csvfile:
    csvreader = csv.reader(csvfile)
    next(csvreader, None)
    rows = []
    for row in csvreader:
        rows.append(row)

input_cnt, output_cnt = 8,1
data = np.asarray(rows, dtype='float32')

In [50]:
# weight와 bias를 포함한 파라미터 및 하이퍼파라미터 설정
RND_MEAN = 0
RND_STD = 0.0030
LEARNING_RATE = 0.001

weight = np.random.normal(RND_MEAN,RND_STD,[input_cnt,output_cnt])
bias = np.zeros([output_cnt])

epoch_count = 100
mb_size = 20
report = 1

In [51]:
# 데이터를 섞고 랜덤으로 추출하기
shuffle_map = np.arange(data.shape[0])
np.random.shuffle(shuffle_map)
step_count = int(data.shape[0] * 0.8) // mb_size
test_begin_idx = step_count * mb_size

#학습에 사용하게될 함수 정의
def relu(x):
    return np.maximum(x,0)
def sigmoid(x):
    return np.exp(-relu(-x)) / (1.0 + np.exp(-np.abs(x)))
def sigmid_derv(x,y):
    return y * (1-y)
def sigmoid_cross_entropy_with_logits(z,x):
    return relu(x) - x * z + np.log(1 + np.exp(-np.abs(x)))
def sigmoid_cross_entropy_with_logits_derv(z,x):
    return -z + sigmoid(x)

# 데이터 학습 및 결과 도출
for epoch in range(epoch_count):
    losses, accs = [], []

    for n in range(step_count):
        if n == 0:
            np.random.shuffle(shuffle_map[:test_begin_idx])
        train_x = data[shuffle_map[mb_size*n:mb_size*(n+1)]][:, :-output_cnt]
        train_y = data[shuffle_map[mb_size*n:mb_size*(n+1)]][:, -output_cnt:]

        # forward path 연산
        output_train_x = np.matmul(train_x,weight) + bias
        entropy = sigmoid_cross_entropy_with_logits(train_y,output_train_x)
        loss = np.mean(entropy)

        # 정확도 계산
        estimate = np.greater(output_train_x,0)
        answer = np.greater(train_y,0.5)
        correct = np.equal(estimate,answer)
        accuracy = np.mean(correct)

        losses.append(loss)
        accs.append(accuracy)

        # backporpagation 연산
        G_loss = 1.0

        g_loss_entropy = 1.0 / np.prod(entropy.shape)
        g_entorpy_output = sigmoid_cross_entropy_with_logits_derv(train_y,output_train_x)

        G_entropy = g_loss_entropy * G_loss
        G_output = g_entorpy_output * G_entropy

        g_outout_w = train_x.transpose()

        G_w = np.matmul(g_outout_w,G_output)
        G_b = np.sum(G_output, axis=0)

        weight -= LEARNING_RATE * G_w
        bias -= LEARNING_RATE * G_b

    if report > 0 and (epoch+1) % report == 0:
        test_data = data[shuffle_map[test_begin_idx:]]
        test_x = test_data[:, :-output_cnt]
        test_y = test_data[:, -output_cnt:]
        output_test_x = np.matmul(test_x,weight) + bias
        acc = np.mean(np.equal(np.greater(output_test_x,0),np.greater(test_y,0.5)))
        print('Epoch {}: loss={:5.3f}, accuracy={:5.3f}/{:5.3f}'. \
              format(epoch+1,np.mean(losses),np.mean(accs),acc))

output_test_x = np.matmul(test_x,weight) + bias
final_acc = np.mean(np.equal(np.greater(output_test_x,0),np.greater(test_y,0.5)))
print('\nFinal Test : final accuracy = {:5.3f}'.format(final_acc))

Epoch 1: loss=0.122, accuracy=0.962/0.966
Epoch 2: loss=0.109, accuracy=0.968/0.956
Epoch 3: loss=0.107, accuracy=0.970/0.971
Epoch 4: loss=0.112, accuracy=0.969/0.967
Epoch 5: loss=0.105, accuracy=0.970/0.973
Epoch 6: loss=0.100, accuracy=0.972/0.973
Epoch 7: loss=0.099, accuracy=0.973/0.972
Epoch 8: loss=0.103, accuracy=0.973/0.972
Epoch 9: loss=0.101, accuracy=0.972/0.971
Epoch 10: loss=0.097, accuracy=0.973/0.974
Epoch 11: loss=0.110, accuracy=0.971/0.971
Epoch 12: loss=0.105, accuracy=0.971/0.966
Epoch 13: loss=0.107, accuracy=0.971/0.972
Epoch 14: loss=0.110, accuracy=0.971/0.973
Epoch 15: loss=0.107, accuracy=0.971/0.972
Epoch 16: loss=0.100, accuracy=0.973/0.954
Epoch 17: loss=0.106, accuracy=0.972/0.973
Epoch 18: loss=0.097, accuracy=0.974/0.974
Epoch 19: loss=0.102, accuracy=0.972/0.971
Epoch 20: loss=0.096, accuracy=0.974/0.975
Epoch 21: loss=0.101, accuracy=0.972/0.974
Epoch 22: loss=0.102, accuracy=0.973/0.653
Epoch 23: loss=0.101, accuracy=0.973/0.964
Epoch 24: loss=0.097