In [1]:
pip install tensorflow

Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import pandas as pd
import keras
import tensorflow as tf

df = pd.read_csv('./bigdata_num.csv', encoding="utf-8-sig")

In [3]:
def relu(x):
    return np.maximum(0,x)

In [4]:
import numpy as np  
import csv

# 실험 결과를 재현하기 위해 난수 발생패턴을 고정시키는 np.random.seed()함수값을 설정
np.random.seed(333)

In [5]:
RND_MEAN = 0  # 평균
RND_STD = 0.003  # 표준편차
Learning_rate = 0.003  # 학습률

In [6]:
def binary_classification_exec(epoch_count=10, mb_size=10, report=1,train_rate = 0.8):
    binary_load_dataset()
    init_model()
    train_and_test(epoch_count, mb_size, report, train_rate)

In [7]:
# 데이터 받기 > 버퍼로 옮겨주기 > np.array()로 감싸주기
def binary_load_dataset():
    with open('./bigdata_num.csv', encoding="utf-8-sig") as csvfile:
        csvreader = csv.reader(csvfile)
        next(csvreader, None)
        rows = []
        for row in csvreader:
            rows.append(row)
            
    global data, input_cnt, output_cnt
    input_cnt, output_cnt = 8, 1  # 독립변수, 종속변수독립변수, 종속변수
    data = np.asarray(rows, dtype='float32') # 배열 구조로 변환하는 과정

In [8]:
def init_model():
    global weight, bias, input_cnt, output_cnt
    weight = np.random.normal(RND_MEAN, RND_STD,[input_cnt, output_cnt])  
    # normal 메서드 :  (mean, sd, shape)
    
    bias = np.zeros([output_cnt])
    # weight 가중치 행렬은 [8,1] , bias 편향 벡터는 [1]형태

In [9]:
def train_and_test(epoch_count, mb_size, report, train_rate):
    step_count = arrange_data(mb_size, train_rate)
    test_x, test_y = get_test_data()

    for epoch in range(epoch_count):
        losses, accs = [], []

        for n in range(step_count):
            train_x, train_y = get_train_data(mb_size, n)
            loss, acc = run_train(train_x, train_y)
            losses.append(loss)
            accs.append(acc)

        if report > 0 and (epoch + 1) % report == 0:
            acc = run_test(test_x, test_y)
            print(f'Epoch {epoch+1}: loss={np.mean(losses):5.3f}, accuracy={np.mean(accs):5.3f}/{acc:5.3f}')
            
    final_acc = run_test(test_x, test_y)
    print(f'\nFinal Test: final accuracy = {final_acc:5.3f}')

In [10]:
def arrange_data(mb_size, train_rate):
    global data, shuffle_map, test_begin_idx

    shuffle_map = np.arange(data.shape[0])
    np.random.shuffle(shuffle_map)

    step_count = int(data.shape[0] * train_rate) // mb_size

    test_begin_idx = step_count * mb_size

    return step_count


def get_test_data():
    global data, shuffle_map, test_begin_idx, output_cnt

    test_data = data[shuffle_map[test_begin_idx:]]

    return test_data[:, :-output_cnt], test_data[:, -output_cnt:]


def get_train_data(mb_size, nth):
    global data, shuffle_map, test_begin_idx, output_cnt

    if nth == 0:
        np.random.shuffle(shuffle_map[:test_begin_idx])

    train_data = data[shuffle_map[mb_size * nth:mb_size * (nth + 1)]]

    return train_data[:, :-output_cnt], train_data[:, -output_cnt:]
    
    
def run_train(x, y):
    output, aux_nn = forward_neuralnet(x)
    loss, aux_pp = forward_postproc(output, y)
    accuracy = eval_accuracy(output, y)

    G_loss = 1.0
    G_output = backprop_postproc(G_loss, aux_pp)
    backprop_neuralnet(G_output, aux_nn)

    return loss, accuracy


def run_test(x, y):
    output, _ = forward_neuralnet(x)
    accuracy = eval_accuracy(output, y)
    
    return accuracy

In [11]:
def forward_neuralnet(x):  
    global weight, bias
    output = np.matmul(x, weight) + bias  # y = xw + b
    
    return output, x

In [12]:
def sigmoid_cross_entropy_with_logits(z, x):
    return relu(x) - x * z + np.log(1 + np.exp(-np.abs(x)))

In [13]:
def backprop_neuralnet(G_output, x):  
    global weight, bias
    g_output_w = x.transpose()

    G_w = np.matmul(g_output_w, G_output) 
    G_b = np.sum(G_output, axis=0)

    weight -= Learning_rate * G_w
    bias -= Learning_rate * G_b

In [14]:
def forward_postproc(output, y):
    CEE = sigmoid_cross_entropy_with_logits(y, output)
    loss = np.mean(CEE)

    return loss, [y, output, CEE]

In [15]:
def backprop_postproc(G_loss, aux):
    y, output, entropy = aux
    
    g_loss_entropy = 1.0 / np.prod(entropy.shape)
    g_entropy_output = sigmoid_cross_entropy_with_logits(y, output)    
    
    G_entropy = g_loss_entropy * G_loss
    G_output = g_entropy_output * G_entropy
    
    return G_output

In [16]:
def eval_accuracy(output, y):
    estimate = np.greater(output, 0)
    answer = np.greater(y, 0.5)
    correct = np.equal(estimate, answer)

    return np.mean(correct)

In [17]:
binary_classification_exec()

Epoch 1: loss=267708712937061072811082121216.000, accuracy=0.997/0.999
Epoch 2: loss=1484596432988807147884937434021558451376447712461064241152.000, accuracy=0.999/0.999
Epoch 3: loss=4250191803154452937874564718334700957627832654766553186303269923469723581078994849628160.000, accuracy=0.999/0.999
Epoch 4: loss=8370679326554694357046277023662395553692666302203094534888613418402972748643399930799415949758675809295442706545246208.000, accuracy=0.999/0.999
Epoch 5: loss=6024995605249498963638741234232412095916670859153270729367921551713251168884456019342659097096858905803985559907249389400894414141348377161669541888.000, accuracy=0.999/0.999
Epoch 6: loss=32449256287360461727076770341032623828644192865383579390827673638817794413733250590701882582820294190284478778975770983758024943078892171495102624709804857562305691532095878332416.000, accuracy=0.999/0.999
Epoch 7: loss=18389104359376870522259887506074230556302020647928737249548464511060263499245117932775479849455944075322417456911698099