### logistic_regression_diabetes
### : 당뇨병 진단 2진 분류 모델


In [1]:
import tensorflow as tf
import numpy as np
tf.random.set_seed(5)

In [7]:
# 데이터 불러 오기
xy = np.loadtxt('data-03-diabetes.csv',delimiter=',',dtype=np.float32)
xy.shape   # (759, 9)

# 학습 데이터 분리 : 70% , 531개
x_train = xy[:531,:-1]  # X
y_train = xy[:531,[-1]] # Y
print(x_train.shape)    # (531, 8)
print(y_train.shape)    # (531, 1)

# 검증 데이터 분리 : 30% , 228개
x_test = xy[531:,:-1]
y_test = xy[531:,[-1]]
print(x_test.shape)     # (228, 8)
print(y_test.shape)     # (228, 1)

(531, 8)
(531, 1)
(228, 8)
(228, 1)


In [8]:
# 변수 초기화 : weight, bias
# (m,n)  * (n,l) = (m,l)   : 행렬의 내적 곱셈 공식
# (531, 8) * (8,1) = (531, 1)
W = tf.Variable(tf.random.normal([8,1]),name='weight')
b = tf.Variable(tf.random.normal([1]),name='bias')

In [9]:
# 예측 함수(hypothesis) : H(X) = sigmoid(W*X + b)
def hypothesis(X):
    return tf.sigmoid(tf.matmul(X,W) + b)  # 0과 1사이의 값이 출력

In [10]:
# 비용 함수 : logloss , 2진 분류 모델
def cost_func():
    cost = -tf.reduce_mean(y_train*tf.math.log(hypothesis(x_train)) +
                         (1-y_train)*tf.math.log(1-hypothesis(x_train)))
    return cost

In [11]:
# 경사 하강법
# learning_rate(학습율)을 0.01 로 설정하여 optimizer객체를 생성
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

In [12]:
# 학습 시작
print('***** Start Learning!!')
for step in range(10001):
    optimizer.minimize(cost_func,var_list=[W,b])
    if step % 100 == 0:
        print('%04d'%step,'cost:[',cost_func().numpy(),']',
             ' W:',W.numpy(),' b:',b.numpy())  
        
print('***** Learning Finished!!')  

***** Start Learning!!
0000 cost:[ 0.8006094 ]  W: [[-0.19030674]
 [-0.9402892 ]
 [-0.02964094]
 [-0.7525406 ]
 [ 1.3131522 ]
 [-0.6285474 ]
 [ 0.84406626]
 [-0.09899963]]  b: [0.23652883]
0100 cost:[ 0.52727795 ]  W: [[-0.65102977]
 [-1.9078277 ]
 [ 0.2328079 ]
 [-1.2235698 ]
 [ 0.7060906 ]
 [-1.4086568 ]
 [ 0.3165263 ]
 [-0.42911732]]  b: [0.64593804]
0200 cost:[ 0.50196886 ]  W: [[-0.7003719 ]
 [-2.62869   ]
 [ 0.16853508]
 [-1.2493113 ]
 [ 0.4307475 ]
 [-1.8856589 ]
 [ 0.07900752]
 [-0.2611641 ]]  b: [0.6689282]
0300 cost:[ 0.49334785 ]  W: [[-0.71150446]
 [-3.0237336 ]
 [ 0.19624333]
 [-1.1704347 ]
 [ 0.25556892]
 [-2.1660438 ]
 [-0.13201909]
 [-0.10620011]]  b: [0.6788833]
0400 cost:[ 0.48995656 ]  W: [[-7.1795529e-01]
 [-3.2340927e+00]
 [ 2.1906504e-01]
 [-1.0466371e+00]
 [ 1.3929063e-01]
 [-2.3197513e+00]
 [-3.0422285e-01]
 [ 1.3167990e-03]]  b: [0.67131776]
0500 cost:[ 0.48841515 ]  W: [[-0.7261078 ]
 [-3.3358548 ]
 [ 0.22019187]
 [-0.91839755]
 [ 0.0538345 ]
 [-2.4010112 ]
 [

In [13]:
# 회귀 계수, weight과 bias 출력
print('Weight:',W.numpy())
print('Bias:',b.numpy())

Weight: [[-0.764687  ]
 [-3.331145  ]
 [ 0.2438961 ]
 [-0.5480689 ]
 [-0.2159974 ]
 [-2.5523152 ]
 [-0.8651095 ]
 [ 0.11381921]]
Bias: [0.3824004]


In [14]:
# 정확도 측정 : accuracy computation
def predict(X):
    return tf.cast(hypothesis(X) > 0.5,dtype=tf.float32)

# 검증 데이터를 사용하여 예측 (70%:30%)
preds = predict(x_test)
accuracy = tf.reduce_mean(tf.cast(tf.equal(preds,y_test),dtype=tf.float32))

print('Accuracy:',accuracy.numpy()) # Accuracy: 0.78070176
# print('Hypothesis:\n',hypothesis(x_test).numpy())
# print('Predict:\n',preds.numpy())

Accuracy: 0.78070176
