# 당뇨병 판단하기 (logistic_classification)

In [1]:
import numpy as np
import matplotlib.pyplot as plt 
import tensorflow as tf

tf.random.set_seed(777)
tf.__version__

'2.1.0'

#### 데이터 확인 

In [2]:
xy = np.loadtxt('../datas/data-03-diabetes.csv', delimiter=',', dtype=np.float32)
x_train = xy[:, 0:-1]
y_train = xy[:, [-1]]

print(x_train.shape, y_train.shape)
print(xy)

(759, 8) (759, 1)
[[-0.294118   0.487437   0.180328  ... -0.53117   -0.0333333  0.       ]
 [-0.882353  -0.145729   0.0819672 ... -0.766866  -0.666667   1.       ]
 [-0.0588235  0.839196   0.0491803 ... -0.492741  -0.633333   0.       ]
 ...
 [-0.411765   0.21608    0.180328  ... -0.857387  -0.7        1.       ]
 [-0.882353   0.266332  -0.0163934 ... -0.768574  -0.133333   0.       ]
 [-0.882353  -0.0653266  0.147541  ... -0.797609  -0.933333   1.       ]]


#### 데이터 셋 만들기 
* Tensorflow data API를 통해 학습시킬 값들을 담는다 (Batch Size는 한번에 학습시킬 Size로 정한다)

In [3]:
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(len(x_train))  # 한번에 759개씩 읽어서 처리 

#### 모델 생성 
* W1 = tf.Variable(tf.random.normal([8, 1]), name="weight")은 아래와 동일한 표현 
* b1 = tf.Variable(tf.random.normal([1]), name='bias')         

In [4]:
W = tf.Variable(tf.random.normal((8, 1)), name="weight")    # 8 X 1 
b = tf.Variable(tf.random.normal((1,)), name='bias')        # , 조심 -> 출력 형태와 동일 
print(W, b)

<tf.Variable 'weight:0' shape=(8, 1) dtype=float32, numpy=
array([[ 0.77064806],
       [ 0.37335405],
       [-0.05576323],
       [ 0.00358377],
       [-0.5898363 ],
       [ 1.5702795 ],
       [ 0.2460895 ],
       [-0.09918973]], dtype=float32)> <tf.Variable 'bias:0' shape=(1,) dtype=float32, numpy=array([-1.3080608], dtype=float32)>


#### 필요한 함수 
* sigmoid
* cost 
* acc
* grad 

In [10]:
# 1. Sigmoid 함수를 가설로 선언
def logistic_regression(x):
    hypothesis = tf.divide(1., 1. + tf.exp(tf.matmul(x, W) + b))
    return hypothesis

# 2. 가설을 검증할 Cost 함수를 정의¶
def loss_fn(hypothesis, x, y):
    cost = -tf.reduce_mean(y * tf.math.log(logistic_regression(x)) + (1 - y) * tf.math.log(1-hypothesis))
    return cost

# 3. 정확도를 계산할 Accuracy 함수 정의
def accuracy_fn(hypothesis, y):
    predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, y), dtype=tf.int32))
    return accuracy

# 4. GradientTape를 통한 Grad(경사값 계산) 함수
def grad(hypothesis, x, y):
    with tf.GradientTape() as tape:
        loss_value = loss_fn(logistic_regression(x), x, y)
    return tape.gradient(loss_value, [W, b])

#### 최적화기 설정 

In [11]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

#### 학습을 실행합니다.

In [15]:
EPOCHS = 1001

for step in range(EPOCHS):
    for X, Y in iter (dataset):
        grads = grad(logistic_regression(X), X, Y)
        optimizer.apply_gradients(grads_and_vars=zip(grads, [W, b]))
        if step % 100 == 0:
            print('step : {}, Loss : {:.4f}'.format(step, loss_fn(logistic_regression(X), X, Y)))

step : 0, Loss : 0.6542
step : 100, Loss : 0.6180
step : 200, Loss : 0.5975
step : 300, Loss : 0.5851
step : 400, Loss : 0.5767
step : 500, Loss : 0.5702
step : 600, Loss : 0.5647
step : 700, Loss : 0.5598
step : 800, Loss : 0.5553
step : 900, Loss : 0.5512
step : 1000, Loss : 0.5474
