# 5장 Logistic (regression) classfier 실습

- logistic hypothesis

    - \\(H(X) = \frac{1}{1+{e}^{-W^{T}X}}\\)


- cost

    - \\(cost(W) = -\frac{1}{m}\sum{y\log{(H(x))}+(1-y)\log{(1-H(x))}}\\)


- Gradient descent algorithm

    - \\(W <= W - \alpha\frac{\delta}{\delta W}cost(W)\\)



## 간단한 학습데이터 사용

In [1]:
import tensorflow as tf

# 학습데이터
x_data = [[1,2], [2,3], [3,1], [4,3], [5,3], [6,2]]
y_data = [[0], [0], [0], [1], [1], [1]]

# 학습데이터를 위한 placeholder
X = tf.placeholder (tf.float32, shape=[None, 2])
Y = tf.placeholder (tf.float32, shape=[None, 1])

# 학습될 weight와 bias
W = tf.Variable (tf.random_normal([2,1]), name='weight')
b = tf.Variable (tf.random_normal([1]), name='bias')

# 가설
hypothesis = tf.sigmoid( tf.matmul(X, W) + b )

# cost
cost = -tf.reduce_mean (Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))

# Gradient descent
train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

# Accuracy
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for step in range(10001):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data, Y: y_data})
        if step % 500 == 0:
            print(step, cost_val)
        
    h, c, a = sess.run([hypothesis, predicted, accuracy], feed_dict={X: x_data, Y: y_data})
    print("\nHypothesis: ", h, "\nCorrect (Y): ", c, "\nAccuracy: ", a)

0 5.95485
500 0.598032
1000 0.513029
1500 0.463876
2000 0.424169
2500 0.389739
3000 0.35946
3500 0.332751
4000 0.309157
4500 0.288273
5000 0.269739
5500 0.253237
6000 0.238492
6500 0.225267
7000 0.21336
7500 0.202599
8000 0.192838
8500 0.183951
9000 0.175834
9500 0.168393
10000 0.161553

Hypothesis:  [[ 0.03597021]
 [ 0.16538167]
 [ 0.32898706]
 [ 0.77056962]
 [ 0.93255746]
 [ 0.97784287]] 
Correct (Y):  [[ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]] 
Accuracy:  1.0


## csv 학습데이터 사용

In [2]:
import numpy as np

xy = np.loadtxt("data-03-diabetes.csv", delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]

X = tf.placeholder(tf.float32, shape=[None, 8])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable(tf.random_normal([8,1]))
b = tf.Variable(tf.random_normal([1]))

hypothesis = tf.sigmoid(tf.matmul(X, W) + b)
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))
train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    feed = {X: x_data, Y: y_data}
    for step in range(10001):
        sess.run(train, feed_dict=feed)
        if step % 500 == 0:
            print(step, sess.run(cost, feed_dict=feed))
            
    h, c, a = sess.run([hypothesis, cost, accuracy], feed_dict=feed)
    print("\nHypothesis: ", h, "\nCost = ", c, "\nAccuracy = ", a)

0 1.06745
500 0.821333
1000 0.758128
1500 0.707558
2000 0.666385
2500 0.633156
3000 0.606484
3500 0.585105
4000 0.567922
4500 0.554037
5000 0.542732
5500 0.533454
6000 0.525773
6500 0.519363
7000 0.513971
7500 0.509403
8000 0.505504
8500 0.502156
9000 0.499263
9500 0.496749
10000 0.494553

Hypothesis:  [[ 0.44421098]
 [ 0.90937173]
 [ 0.27573535]
 [ 0.92891049]
 [ 0.33907938]
 [ 0.7176587 ]
 [ 0.94337398]
 [ 0.64541084]
 [ 0.30034021]
 [ 0.43381801]
 [ 0.62415636]
 [ 0.22052272]
 [ 0.24530472]
 [ 0.38184738]
 [ 0.74739212]
 [ 0.45729017]
 [ 0.69773835]
 [ 0.91790491]
 [ 0.80776095]
 [ 0.54343319]
 [ 0.60824037]
 [ 0.11972158]
 [ 0.61988002]
 [ 0.68375254]
 [ 0.39324307]
 [ 0.9215883 ]
 [ 0.51486975]
 [ 0.55478686]
 [ 0.6903553 ]
 [ 0.4528617 ]
 [ 0.94779176]
 [ 0.77204168]
 [ 0.55953103]
 [ 0.81935465]
 [ 0.3816137 ]
 [ 0.62367249]
 [ 0.81768221]
 [ 0.54464006]
 [ 0.54078573]
 [ 0.36304206]
 [ 0.74558794]
 [ 0.13697581]
 [ 0.45230141]
 [ 0.10765711]
 [ 0.63784093]
 [ 0.91179621]
 [ 0.7