In [16]:
import tensorflow as tf
import numpy as np
tf.random.set_seed(5)

In [17]:
# 데이터 불러 오기
xy = np.loadtxt('caesarian.csv', delimiter=',', dtype=np.float32)
xy.shape        #(759,9)
print(xy.shape)
# 학습 데이터 분리 : 70% 531개
x_train = xy[:56,:-1]     # X     
y_train = xy[:56,[-1]]    # Y
print(x_train.shape,y_train.shape)

# 검증 데이터 분리 : 30% 228개
x_test = xy[56:,:-1]     # X     
y_test = xy[56:,[-1]]    # Y
print(x_test.shape,y_test.shape)

(80, 6)
(56, 5) (56, 1)
(24, 5) (24, 1)


In [18]:
# 변수 초기화 : weight, bias
# (m,n) * (n,l) = (m,l)  : 행렬의 내적 곱셉 공식
#(80,5) * (5,1) = (80,1)                                 # x_train.shape과 W를 행렬곱하여 y_train.shape되도록
W = tf.Variable(tf.random.normal([5,1]),name='weight')     # W의 행렬 수치 수정(random.normal([ 여기 값 ])) 
b = tf.Variable(tf.random.normal([1]),name='bias')         # b(bias)는 W의 마지막값과 같은 값으로 둔다 

In [19]:
# 예측 함수(hypothesis) : H(X) = sigmoid(W*X + b)
def hypothesis(X):
    return tf.sigmoid(tf.matmul(X,W) + b)  # 0 과 1 사이의 값 출력   

In [20]:
# 비용 함수 : logloss, 2진 분류 모델
def cost_func():
    cost = -tf.reduce_mean(y_train*tf.math.log(hypothesis(x_train)) +                # y가 0일때는 앞의식은 0됨 
                         (1-y_train)*tf.math.log(1-hypothesis(x_train)))             # y가 1일때는 뒤의식은 0됨 
    return cost

In [21]:
# 경사 하강법
# learning_rate(학습율)을 0.01 로 설정하여 optimizer객체를 생성
optimizer = tf.optimizers.Adam(learning_rate=0.01)

In [22]:
# 학습 시작
print('***** Start Learning!!')
for step in range(10001):
    optimizer.minimize(cost_func,var_list=[W,b])
    if step % 1000 == 0:
        print('%04d'%step,'cost:[',cost_func().numpy(),']',
             ' W:',W.numpy(),' b:',b.numpy())
        
print('***** Learning Finished!!')

***** Start Learning!!
0000 cost:[ 3.2573545 ]  W: [[-0.17030635]
 [-0.940286  ]
 [-0.02964031]
 [-0.73254037]
 [ 1.3331525 ]]  b: [0.23652889]
1000 cost:[ 0.5308221 ]  W: [[-0.03349107]
 [ 0.25211978]
 [-0.27006328]
 [ 0.15151864]
 [ 2.5541186 ]]  b: [-0.17207605]
2000 cost:[ 0.5292896 ]  W: [[-0.01470615]
 [ 0.2631409 ]
 [-0.2468881 ]
 [ 0.19900672]
 [ 2.552211  ]]  b: [-0.77784324]
3000 cost:[ 0.5291536 ]  W: [[-0.00793587]
 [ 0.26635006]
 [-0.23871742]
 [ 0.21688206]
 [ 2.5552855 ]]  b: [-0.99656326]
4000 cost:[ 0.52915204 ]  W: [[-0.00714858]
 [ 0.26674047]
 [-0.23777401]
 [ 0.21897122]
 [ 2.5557613 ]]  b: [-1.0220594]
5000 cost:[ 0.52915204 ]  W: [[-0.00713164]
 [ 0.2667489 ]
 [-0.23775375]
 [ 0.21901618]
 [ 2.555772  ]]  b: [-1.0226083]
6000 cost:[ 0.529152 ]  W: [[-0.00713156]
 [ 0.26674873]
 [-0.23775361]
 [ 0.21901639]
 [ 2.5557725 ]]  b: [-1.0226104]
7000 cost:[ 0.52915204 ]  W: [[-0.00713149]
 [ 0.2667488 ]
 [-0.23775363]
 [ 0.21901666]
 [ 2.555773  ]]  b: [-1.0226127]
8000

In [23]:
# 예측
print('***** Predict')

tx_test = np.array(x_test,dtype=np.float32)
print(hypothesis(tx_test))

***** Predict
tf.Tensor(
[[0.86615884]
 [0.28789198]
 [0.37677163]
 [0.8862466 ]
 [0.27883387]
 [0.33796775]
 [0.32797652]
 [0.88669693]
 [0.82365775]
 [0.37291485]
 [0.44940922]
 [0.8888288 ]
 [0.28207877]
 [0.2391767 ]
 [0.29377595]
 [0.753536  ]
 [0.38466218]
 [0.9108603 ]
 [0.88338655]
 [0.3317173 ]
 [0.50700676]
 [0.3785572 ]
 [0.19628417]
 [0.2856133 ]], shape=(24, 1), dtype=float32)


In [24]:
#정확도 측정 : accuracy computation
def predict(X):
    return tf.cast(hypothesis(X) > 0.5,dtype=tf.float32)

# 검증 데이터를 사용하여 예측 
preds = predict(x_test)
accuracy = tf.reduce_mean(tf.cast(tf.equal(preds,y_test),dtype=tf.float32))

print('Accuracy:',accuracy.numpy()) 
# print('Hypothesis:\n',hypothesis(x_test).numpy())
# print('Predict:\n',preds.numpy())

Accuracy: 0.45833334
