In [22]:
import numpy as np
import csv

np.random.seed(1010)

RND_MEAN = 0
RND_STD = 0.003

LEARNING_RATE = 0.003

In [23]:
# 앞서 작성한 회귀 모델을 실행
%run ./Neural_Network_Regression.ipynb

Epoch 1 : Train - loss = 33.823, accuracy = 0.560 / Test = 0.811
Epoch 2 : Train - loss = 8.076, accuracy = 0.820 / Test = 0.808
Epoch 3 : Train - loss = 7.444, accuracy = 0.812 / Test = 0.804
Epoch 4 : Train - loss = 7.343, accuracy = 0.811 / Test = 0.801
Epoch 5 : Train - loss = 7.267, accuracy = 0.810 / Test = 0.802
Epoch 6 : Train - loss = 7.202, accuracy = 0.811 / Test = 0.801
Epoch 7 : Train - loss = 7.147, accuracy = 0.810 / Test = 0.801
Epoch 8 : Train - loss = 7.101, accuracy = 0.810 / Test = 0.802
Epoch 9 : Train - loss = 7.056, accuracy = 0.812 / Test = 0.799
Epoch 10 : Train - loss = 7.022, accuracy = 0.810 / Test = 0.800

 Result : final accuracy = 0.800


In [24]:
def binary_classification_exec(epoch_count = 10, mb_size = 10, report = 1, train_rate = 0.75):
    binary_load_dataset()
    init_model()
    train_and_test(epoch_count, mb_size, report, train_rate)

In [25]:
# 데이터 전처리 과정
def binary_load_dataset():
    with open('./data/pulsar_stars.csv') as csvfile:
        csvreader = csv.reader(csvfile)
        next(csvreader, None)
        rows = []
        for row in csvreader:
            rows.append(row)

    global data,input_cnt,output_cnt

    input_cnt, output_cnt = 8,1 # 입출력 벡터 수
    data = np.asarray(rows, dtype='float32') #현재 리스트 구조의 변수를 배열구조로 변환 -> numpy 연산을 효율적으로 사용하기 위함

In [26]:
# 순전파 과정
def forward_postproc(output, y):
    CEE = sigmoid_cross_entropy_with_logits(y,output) # 시그모이드 교차 엔트로피 함수
    loss = np.mean(CEE)

    return loss, [y, output, CEE] # 리스트 : 역전파 과정을 위함

In [27]:
# 손실함수에 대한 역전파
def backprop_postproc(G_loss, aux):
    y, output, CEE = aux
    G_loss = 1.0

    g_loss_entropy = 1.0 / np.prod(CEE.shape)
    g_entropy_output = sigmoid_cross_entropy_with_logits_derv(y,output)

    G_entropy = g_loss_entropy * G_loss
    G_output = g_entropy_output * G_entropy

    return G_output

In [31]:
# 시그모이드 관련 함수 정의

# 0 과 x 중 큰 값을 출력
def relu(x):
    return np.maximum(x,0)

# 시그모이드 함수
def sigmoid(x):
    return np.exp(-relu(-x)) / (1 + np.exp(-np.abs(x)))

# 시그모이드 함수의 편미분
def sigmoid_derv(x,y):
    return y *(1-y)

# 시그모이드 교차 엔트로피 
def sigmoid_cross_entropy_with_logits(z,x):
    return relu(x) - x * z + np.log(1+np.exp(-np.abs(x)))

# 시그모이드 교차 엔트로피의 편미분( x에 대한 편미분으로 진행))
def sigmoid_cross_entropy_with_logits_derv(z,x):
    return -z + sigmoid(x)


In [29]:
# 이진 분류 문제에 대한 정확도 판단
def eval_accuracy(output,y):
    # np.greater(a,b) : a 가 b보다 크면 1(True), 작다면 0(False)
    estimate = np.greater(output,0)
    answer = np.greater(y,0.5)
    correct = np.equal(estimate, answer)

    return np.mean(correct)


In [32]:
binary_classification_exec()

Epoch 1 : Train - loss = 0.140, accuracy = 0.961 / Test = 0.969
Epoch 2 : Train - loss = 0.127, accuracy = 0.967 / Test = 0.965
Epoch 3 : Train - loss = 0.125, accuracy = 0.967 / Test = 0.972
Epoch 4 : Train - loss = 0.134, accuracy = 0.968 / Test = 0.970
Epoch 5 : Train - loss = 0.128, accuracy = 0.970 / Test = 0.972
Epoch 6 : Train - loss = 0.124, accuracy = 0.970 / Test = 0.970
Epoch 7 : Train - loss = 0.107, accuracy = 0.971 / Test = 0.971
Epoch 8 : Train - loss = 0.118, accuracy = 0.971 / Test = 0.969
Epoch 9 : Train - loss = 0.121, accuracy = 0.970 / Test = 0.973
Epoch 10 : Train - loss = 0.129, accuracy = 0.970 / Test = 0.971

 Result : final accuracy = 0.971


In [None]:
'''
위의 결과는 상당한 결과지만, 사실 착시현상(?)이 존재한다
알고자 하는 Target_class의 데이터 불균형으로 인한 결과..( 0 : 16000개, 1 : 1600개 )
따라서 균형을 위해 데이터의 증폭이 필요 -> 일부 함수의 수정한다..!
'''

In [33]:
# 기존의 함수를 일부 수정한다 / adjust_ratio : 데이터 증폭 여부에 대한 인자
def binary_classification_exec(epoch_count = 10, mb_size = 10, report = 1, train_rate = 0.75,adjust_ratio = False):
    binary_load_dataset(adjust_ratio)
    init_model()
    train_and_test(epoch_count, mb_size, report, train_rate)

In [44]:
# 데이터 전처리 과정 + 증촉 여부에 따른 추가 전처리 과정
def binary_load_dataset(adjust_ratio):
    pulsars, stars = [], []         # 데이터를 따로 저장하기 위한 빈 객체 생성
    with open('./data/pulsar_stars.csv') as csvfile:
        csvreader = csv.reader(csvfile)
        next(csvreader, None)

        for row in csvreader:       # 이전에는 한 번에 저장했다면, 이번엔 불균형의 열을 따로 저장
            if row[8] == '1' :      # target_class 부분을 따로 데이터 저장
                pulsars.append(row)
            else:
                stars.append(row)   # 그 외 데이터 저장

    global data,input_cnt,output_cnt

    input_cnt, output_cnt = 8,1 # 입출력 벡터 수
    star_cnt, pulsar_cnt = len(stars), len(pulsars)
    if adjust_ratio:    # adjust_ratio 가 참일 경우(데이터 증폭을 진행할 경우)
        data = np.zeros([2*star_cnt,9]) # 기존의 data 버퍼 크기보다 크게 설정(행 길이 2배, 열은 9개를 의미)
        data[0:star_cnt,:] = np.asarray(stars,dtype='float32')  # 위 크기의 절반은 stars 데이터

        for n in range(star_cnt):   # 나머지 절반은 pulsars 데이터 저장
            data[star_cnt + n] = np.asarray(pulsars[n % pulsar_cnt], dtype='float32')


    else:   # adjust_ratio 가 거짓이라면, 이전과 동일하게 저장
        data = np.zeros([star_cnt + pulsar_cnt, 9])
        data[0:star_cnt,:] = np.asarray(stars,dtype='float32')
        data[star_cnt:,:] = np.asarray(pulsar_cnt,dtype='float32')


In [35]:
# 이진 분류 문제에 대한 정확도 판단
def eval_accuracy(output,y):
    # np.greater(a,b) : a 가 b보다 크면 1(True), 작다면 0(False)
    est_yes = np.greater(output,0)
    ans_yes = np.greater(y,0.5)

    est_no = np.logical_not(est_yes)
    ans_no = np.logical_not(ans_yes)

    TP = np.sum(np.logical_and(est_yes,ans_yes))
    FP = np.sum(np.logical_and(est_yes,ans_no))
    FN = np.sum(np.logical_and(est_no,ans_yes))
    TN = np.sum(np.logical_and(est_no,ans_no))

    accuracy = safe_div(TP+TN, TP+FP+FN+TN)
    precision = safe_div(TP,TP+FP)
    recall = safe_div(TP,TP+FN)

    f1 = 2 * safe_div(recall * precision, recall + precision)
    

    return [accuracy, precision, recall, f1]


In [36]:
# 분모가 0일 경우, 분자의 부호에 맞는 부호로 변환
def safe_div(p,q):
    # 형 변환
    p, q = float(p), float(q)
    if np.abs(q) < 1.0e-20:
        return np.sign(p)
    
    return p / q

In [38]:
# 학습 및 평가
def train_and_test(epoch_count, mb_size, report, train_rate):
    step_count = arrange_data(mb_size, train_rate)
    test_x, test_y = get_test_data()

    for epoch in range(epoch_count):
        losses = []

        for n in range(step_count):
            train_x, train_y= get_train_data(mb_size, n)
            loss, _ = run_train(train_x,train_y)
            losses.append(loss)
        
        if report > 0 and (epoch + 1) % report == 0:
            acc = run_test(test_x, test_y)
            print("Epoch {} : Train - loss = {:5.3f}, accuracy = {:5.3f}, precision = {:5.3f}, recall = {:5.3f}, f1 = {:5.3f}".\
                format(epoch+1, np.mean(losses), acc[0], acc[1], acc[2], acc[3]))
    
    final_acc = run_test(test_x,test_y)
    print('\n Result : final accuracy = {:5.3f}, precision = {:5.3f}, recall = {:5.3f}, f1 = {:5.3f}'.format(final_acc[0],final_acc[1],final_acc[2],final_acc[3]))

In [47]:
binary_classification_exec(epoch_count=1000, report=100, mb_size=10, adjust_ratio=False)

Epoch 100 : Train - loss = -63298719093271.656, accuracy = 0.094, precision = 0.094, recall = 1.000, f1 = 0.172
Epoch 200 : Train - loss = -126916015462562.141, accuracy = 0.094, precision = 0.094, recall = 1.000, f1 = 0.172
Epoch 300 : Train - loss = -190533289488288.000, accuracy = 0.094, precision = 0.094, recall = 1.000, f1 = 0.172
Epoch 400 : Train - loss = -254150581130592.062, accuracy = 0.094, precision = 0.094, recall = 1.000, f1 = 0.172
Epoch 500 : Train - loss = -317767880507534.875, accuracy = 0.094, precision = 0.094, recall = 1.000, f1 = 0.172
Epoch 600 : Train - loss = -381385182032679.500, accuracy = 0.094, precision = 0.094, recall = 1.000, f1 = 0.172
Epoch 700 : Train - loss = -445002453480455.812, accuracy = 0.094, precision = 0.094, recall = 1.000, f1 = 0.172
Epoch 800 : Train - loss = -508619739107502.375, accuracy = 0.094, precision = 0.094, recall = 1.000, f1 = 0.172
Epoch 900 : Train - loss = -572237021726713.500, accuracy = 0.094, precision = 0.094, recall = 1.

In [46]:
binary_classification_exec(epoch_count=1000, report=100, mb_size=10, adjust_ratio=True)

Epoch 100 : Train - loss = 0.342, accuracy = 0.914, precision = 0.984, recall = 0.839, f1 = 0.906
Epoch 200 : Train - loss = 0.351, accuracy = 0.930, precision = 0.977, recall = 0.879, f1 = 0.926
Epoch 300 : Train - loss = 0.323, accuracy = 0.930, precision = 0.945, recall = 0.912, f1 = 0.928
Epoch 400 : Train - loss = 0.318, accuracy = 0.919, precision = 0.988, recall = 0.847, f1 = 0.912
Epoch 500 : Train - loss = 0.310, accuracy = 0.931, precision = 0.980, recall = 0.880, f1 = 0.927
Epoch 600 : Train - loss = 0.309, accuracy = 0.939, precision = 0.964, recall = 0.911, f1 = 0.937
Epoch 700 : Train - loss = 0.329, accuracy = 0.936, precision = 0.972, recall = 0.897, f1 = 0.933
Epoch 800 : Train - loss = 0.314, accuracy = 0.932, precision = 0.942, recall = 0.919, f1 = 0.930
Epoch 900 : Train - loss = 0.310, accuracy = 0.928, precision = 0.928, recall = 0.926, f1 = 0.927
Epoch 1000 : Train - loss = 0.297, accuracy = 0.935, precision = 0.968, recall = 0.899, f1 = 0.932

 Result : final ac