# logistic_regression_diabetes
### 당뇨병 진단 2진 분류 모델

In [1]:
import tensorflow as tf
import numpy as np
tf.random.set_seed(5)

In [4]:
# 데이터 불러 오기
xy = np.loadtxt('data-03-diabetes.csv', delimiter=',', dtype=np.float32)
xy.shape        #(759,9)

# 학습 데이터 분리 : 70% 531개
x_train = xy[:531,:-1]     # X     
y_train = xy[:531,[-1]]    # Y
print(x_train.shape,y_train.shape)

# 검증 데이터 분리 : 30% 228개
x_test = xy[531:,:-1]     # X     
y_test = xy[531:,[-1]]    # Y
print(x_test.shape,y_test.shape)

(531, 8) (531, 1)
(228, 8) (228, 1)


In [5]:
# 변수 초기화 : weight, bias
# (m,n) * (n,l) = (m,l)  : 행렬의 내적 곱셉 공식
#(531,8) * (8,1) = (531,1)                                 # x_train.shape과 W를 행렬곱하여 y_train.shape되도록
W = tf.Variable(tf.random.normal([8,1]),name='weight')     # W의 행렬 수치 수정(random.normal([ 여기 값 ])) 
b = tf.Variable(tf.random.normal([1]),name='bias')         # b(bias)는 W의 마지막값과 같은 값으로 둔다 

In [6]:
# 예측 함수(hypothesis) : H(X) = sigmoid(W*X + b)
def hypothesis(X):
    return tf.sigmoid(tf.matmul(X,W) + b)  # 0 과 1 사이의 값 출력   

In [7]:
# 비용 함수 : logloss, 2진 분류 모델
def cost_func():
    cost = -tf.reduce_mean(y_train*tf.math.log(hypothesis(x_train)) +                # y가 0일때는 앞의식은 0됨 
                         (1-y_train)*tf.math.log(1-hypothesis(x_train)))             # y가 1일때는 뒤의식은 0됨 
    return cost

In [8]:
# 경사 하강법
# learning_rate(학습율)을 0.01 로 설정하여 optimizer객체를 생성
optimizer = tf.optimizers.Adam(learning_rate=0.01)

In [9]:
# 학습 시작
print('***** Start Learning!!')
for step in range(10001):
    optimizer.minimize(cost_func,var_list=[W,b])
    if step % 1000 == 0:
        print('%04d'%step,'cost:[',cost_func().numpy(),']',
             ' W:',W.numpy(),' b:',b.numpy())
        
print('***** Learning Finished!!')

***** Start Learning!!
0000 cost:[ 0.8006094 ]  W: [[-0.19030674]
 [-0.9402892 ]
 [-0.02964094]
 [-0.7525406 ]
 [ 1.3131522 ]
 [-0.6285474 ]
 [ 0.84406626]
 [-0.09899963]]  b: [0.23652883]
1000 cost:[ 0.48674077 ]  W: [[-0.75487643]
 [-3.362079  ]
 [ 0.2107454 ]
 [-0.58146983]
 [-0.16941816]
 [-2.5226405 ]
 [-0.76775914]
 [ 0.12745774]]  b: [0.4801401]
2000 cost:[ 0.4866587 ]  W: [[-0.76455224]
 [-3.3315148 ]
 [ 0.24341255]
 [-0.54766756]
 [-0.21579498]
 [-2.5522876 ]
 [-0.8637516 ]
 [ 0.11404333]]  b: [0.38385937]
3000 cost:[ 0.4866587 ]  W: [[-0.7646867 ]
 [-3.3311477 ]
 [ 0.24389562]
 [-0.5480688 ]
 [-0.21599658]
 [-2.5523124 ]
 [-0.865107  ]
 [ 0.11381964]]  b: [0.38240343]
4000 cost:[ 0.48665863 ]  W: [[-0.7646869 ]
 [-3.3311465 ]
 [ 0.24389595]
 [-0.5480693 ]
 [-0.215997  ]
 [-2.5523133 ]
 [-0.86510783]
 [ 0.1138196 ]]  b: [0.38240215]
5000 cost:[ 0.4866587 ]  W: [[-0.76468694]
 [-3.3311462 ]
 [ 0.24389583]
 [-0.5480689 ]
 [-0.21599719]
 [-2.552314  ]
 [-0.8651085 ]
 [ 0.11381963

In [11]:
# 회귀 계수, weight과 bias 출력
print('weight :', W.numpy())
print('Bias: ', b.numpy())

weight : [[-0.764687  ]
 [-3.331145  ]
 [ 0.2438961 ]
 [-0.5480689 ]
 [-0.2159974 ]
 [-2.5523152 ]
 [-0.8651095 ]
 [ 0.11381921]]
Bias:  [0.3824004]


In [10]:
# 예측
print('***** Predict')

tx_test = np.array(x_test,dtype=np.float32)
print(hypothesis(tx_test))

***** Predict
tf.Tensor(
[[0.5186423 ]
 [0.65317553]
 [0.7049229 ]
 [0.7473892 ]
 [0.89631176]
 [0.93097717]
 [0.1671316 ]
 [0.08234812]
 [0.7229071 ]
 [0.48988405]
 [0.2578944 ]
 [0.8827405 ]
 [0.91010207]
 [0.812207  ]
 [0.9372864 ]
 [0.9064792 ]
 [0.79980516]
 [0.83449954]
 [0.7937141 ]
 [0.50166076]
 [0.83170664]
 [0.6545904 ]
 [0.06006374]
 [0.8877376 ]
 [0.8943105 ]
 [0.77172846]
 [0.9227856 ]
 [0.85504556]
 [0.89721924]
 [0.54396963]
 [0.66321665]
 [0.91610706]
 [0.8380459 ]
 [0.843129  ]
 [0.891571  ]
 [0.5948661 ]
 [0.73433125]
 [0.84799093]
 [0.530768  ]
 [0.5834661 ]
 [0.08651686]
 [0.23346478]
 [0.86579895]
 [0.7474394 ]
 [0.6957312 ]
 [0.65634143]
 [0.95374775]
 [0.42014793]
 [0.86635745]
 [0.2487074 ]
 [0.9420323 ]
 [0.30538973]
 [0.73137134]
 [0.598598  ]
 [0.8734594 ]
 [0.58074236]
 [0.17434463]
 [0.80170673]
 [0.9398475 ]
 [0.31207523]
 [0.9222123 ]
 [0.90323305]
 [0.8974244 ]
 [0.8382085 ]
 [0.3942761 ]
 [0.28756097]
 [0.68965137]
 [0.11290926]
 [0.9649532 ]
 [0.25530

In [12]:
# 정확도 측정 : accuracy computation
def predict(X):
    return tf.cast(hypothesis(X) > 0.5,dtype=tf.float32)

# 검증 데이터를 사용하여 예측 (70%:30%)
preds = predict(x_test)
accuracy = tf.reduce_mean(tf.cast(tf.equal(preds,y_test),dtype=tf.float32))

print('Accuracy:',accuracy.numpy()) # Accuracy: 0.78070176
# print('Hypothesis:\n',hypothesis(x_test).numpy())
# print('Predict:\n',preds.numpy())

Accuracy: 0.78070176
