데이터 다운로드 주소입니다. 
https://www.kaggle.com/pavanraj159/predicting-a-pulsar-star

In [14]:
%run ../../leeyua/AI_CODE/AI_abalone.ipynb 

## A.1 실험용 메인 함수 정의

In [15]:
def pulsar_exec(epoch_count = 10,mb_size = 10, report=1 ): 
    load_pulsar_dataset()     
    init_model()            
    train_and_test(epoch_count, mb_size, report)

## B.1 데이터 적재 함수 정의

In [16]:
def load_pulsar_dataset(): 
    with open('pulsar_stars.csv') as csvfile:
        csvreader = csv.reader(csvfile)
        next(csvreader, None)
        rows = [] 
        for row in csvreader:
            rows.append(row)
    
    global data, input_cnt, output_cnt  
    input_cnt, output_cnt = 8,1
    data = np.asarray(rows, dtype = 'float32') 

#### * B.2,3 / C.1~5 / D.1 은 단층퍼셉트론에서 정의한 것을 그대로 활용 

## D.2 / E.2 후처리 과정에 대한 순전파와 역전파 함수의 재정의

In [17]:
def forward_postproc(output, y): 
    entropy = sigmoid_cross_entropy_with_logits(y, output)
    loss = np.mean(entropy) 
    return loss, [y,output,entropy] 

def backprop_postproc(G_loss, aux): 
    y, output, entropy = aux 
    
    g_loss_entropy = 1.0 / np.prod(entropy.shape)
    g_entropy_output = sigmoid_cross_entropy_with_logits_derv(y,output)
    
    G_entropy = g_loss_entropy * G_loss
    G_output = g_entropy_output * G_entropy

    return G_output

## D.4 정확도 계산 정의 함수

In [18]:
def eval_accuracy(output,y):
    estimate = np.greater(output, 0) 
    answer = np.greater(y,0.5)
    correct = np.equal(estimate, answer)
  
    return np.mean(correct)  

## E. ~ 시그모이드 관련 함수 정의

In [19]:
def relu(x):
    return np.maximum(x,0) 

def sigmoid(x): 
    return np.exp(-relu(-x)) / (1.0 + np.exp(-np.abs(x)))

def sigmoid_derv(x,y):
    return y * (1-y)

def sigmoid_cross_entropy_with_logits(z,x): 
    return relu(x) - x * z + np.log( 1 + np.exp(-np.abs(x)))

def sigmoid_cross_entropy_with_logits_derv(z,x):
    return -z + sigmoid(x)

## 천체의 펄서여부 판정신경망 학습시키기

In [20]:
pulsar_exec()

Epoch 1: loss=0.154, accuracy=0.959/0.972
Epoch 2: loss=0.131, accuracy=0.966/0.972
Epoch 3: loss=0.136, accuracy=0.967/0.970
Epoch 4: loss=0.133, accuracy=0.968/0.970
Epoch 5: loss=0.121, accuracy=0.968/0.969
Epoch 6: loss=0.145, accuracy=0.968/0.974
Epoch 7: loss=0.122, accuracy=0.970/0.975
Epoch 8: loss=0.127, accuracy=0.970/0.976
Epoch 9: loss=0.125, accuracy=0.970/0.976
Epoch 10: loss=0.134, accuracy=0.968/0.976

Final Test: final accuracy = 0.976


## 펄서 or 일반 별 확인 

In [21]:
x = np.array([130,52,0.4,0.6,3.1,20,8,72])
output = forward_neuralnet(x)
print(output)
x_sig= sigmoid(output[0])
new_data = np.greater(x_sig,0.5)
print(new_data)

(array([-6.51508291]), array([130. ,  52. ,   0.4,   0.6,   3.1,  20. ,   8. ,  72. ]))
[False]


### ----학습데이터 증폭 및 정확도계산 정의 함수 재정의----

## A.1 메인 실행함수 재정의

In [22]:
def pulsar_exec(epoch_count=10,mb_size=10, report=1, adjust_ratio = False):
    load_pulsar_dataset(adjust_ratio)
    init_model()
    train_and_test(epoch_count, mb_size, report)

## B.1 데이터 적재 함수 재정의 

In [23]:
def load_pulsar_dataset(adjust_ratio): 
    pulsars, stars = [], []  
    with open('pulsar_stars.csv') as csvfile: 
        csvreader = csv.reader(csvfile)
        next(csvreader, None)
        rows = [] 
        for row in csvreader:
            if row[8] == '1' : pulsars.append(row) 
            else : stars.append(row) 
    global data, input_cnt, output_cnt  
    input_cnt, output_cnt, = 8,1
    
    star_cnt, pulsar_cnt = len(stars), len(pulsars) 
    
    if adjust_ratio:
        data = np.zeros([2*star_cnt , 9]) 
        data[0:star_cnt, :] = np.asarray(stars, dtype='float32')

        for n in range(star_cnt):
            data[star_cnt+n]=np.asarray(pulsars[n % pulsar_cnt], dtype='float32')
    else: 
        data = np.zeros([star_cnt+pulsar_cnt,9]) 
        data[0:star_cnt, :] = np.asarray(stars, dtype="float32")
        data[star_cnt:, :] = np.asarray(pulsars, dtype="float32")
    

## D.4 정확도 계산 정의 함수 재정의 

In [24]:
def eval_accuracy(output, y):
    est_yes = np.greater(output, 0) 
    ans_yes = np.greater(y, 0.5) 
   
    est_no = np.logical_not(est_yes)
    ans_no = np.logical_not(ans_yes) 
     
    tp = np.sum(np.logical_and(est_yes, ans_yes))
    fp = np.sum(np.logical_and(est_no, ans_yes))  
    fn = np.sum(np.logical_and(est_yes,ans_no))
    tn = np.sum(np.logical_and(est_no,ans_no))
 
    accuracy = safe_div(tp+tn,tp+tn+fn+fp)
   
    precision = safe_div(tp, tp + fp)
   
    recall = safe_div(tp, tp + fn)

    f1 = 2 * safe_div(recall * precision, recall + precision)
   
    return [accuracy, precision, recall, f1]


def safe_div(p,q):
    p,q = float(p),float(q)
    if np.abs(q) < 1.0e-20: return np.sign(p)
    
    return p / q

## B.3 출력문 수정을 위한 실행함수 재정의

In [25]:
def train_and_test(epoch_count, mb_size, report):
    step_count = arrange_data(mb_size)
    test_x, test_y = get_test_data()
    
    for epoch in range(epoch_count): 
        losses = []
        
        for n in range(step_count):
            train_x, train_y = get_train_data(mb_size, n)
            loss,_ = run_train(train_x,train_y)
            losses.append(loss)
            
        if report > 0 and (epoch+1) % report == 0:
            acc = run_test(test_x,test_y) 
            acc_str = ','.join(['%5.3f']*4) % tuple(acc)
           
            print('Epoch{}:loss = {:5.3f},result={}'.format(epoch+1,np.mean(losses),acc_str))
            
  
    acc = run_test(test_x, test_y)
    acc_str = ','.join(['%5.3f']*4) % tuple(acc)
    print('\n Final Test: final result = {}'.format(acc_str))
    
    

In [None]:
#acc_str = ','.join(['%5.3f']*4) % tuple([1,2,3,4])

## 실행하기 (adjust_ratio = False)

In [26]:
pulsar_exec(adjust_ratio = False)

Epoch1:loss = 0.136,result=0.972,0.794,0.879,0.835
Epoch2:loss = 0.124,result=0.973,0.732,0.959,0.830
Epoch3:loss = 0.127,result=0.966,0.629,0.990,0.770
Epoch4:loss = 0.129,result=0.973,0.717,0.970,0.824
Epoch5:loss = 0.126,result=0.961,0.841,0.752,0.794
Epoch6:loss = 0.134,result=0.973,0.769,0.911,0.834
Epoch7:loss = 0.130,result=0.975,0.757,0.949,0.842
Epoch8:loss = 0.132,result=0.972,0.723,0.955,0.823
Epoch9:loss = 0.120,result=0.975,0.754,0.953,0.842
Epoch10:loss = 0.123,result=0.971,0.692,0.982,0.812

 Final Test: final result = 0.971,0.692,0.982,0.812


## 실행하기 (adjust_ratio = True)

In [27]:
pulsar_exec(adjust_ratio=True)

Epoch1:loss = 0.402,result=0.907,0.926,0.892,0.909
Epoch2:loss = 0.371,result=0.916,0.892,0.937,0.914
Epoch3:loss = 0.351,result=0.915,0.848,0.980,0.909
Epoch4:loss = 0.374,result=0.917,0.854,0.976,0.911
Epoch5:loss = 0.374,result=0.900,0.933,0.875,0.903
Epoch6:loss = 0.354,result=0.922,0.870,0.970,0.918
Epoch7:loss = 0.371,result=0.919,0.907,0.929,0.918
Epoch8:loss = 0.344,result=0.925,0.897,0.950,0.923
Epoch9:loss = 0.363,result=0.910,0.833,0.984,0.902
Epoch10:loss = 0.359,result=0.925,0.888,0.959,0.922

 Final Test: final result = 0.925,0.888,0.959,0.922
