# Logistic Classifier

> ### Pass, Fail 를 정해야 하는 상황에서 주로 사용  
> ex)
> - 이메일에서 스팸 메일(1)과 일반 메일(0)을 구분할 때  
> - 페이스북에서 흥미 있을만한 피드(1)와 흥미 없을만한 피드(0)  
> - 카드를 사용할 때 평소에 주로 사용되는 패턴(0)인지 아닌지(1)

## sigmoid

![](https://upload.wikimedia.org/wikipedia/commons/thumb/8/88/Logistic-curve.svg/320px-Logistic-curve.svg.png)

## $
g(z) = \frac{1}{(1 + e^{-z})}
$

## Logistic Hypothesis
## $
H(X) = \frac{1}{1 + e^{-W^{T}X}}
$

## New Cost function for logistic
$
cost(W) = \frac{1}{m}\sum_{i=1}^{m}c(H(x),y)
$  

$
c(H(x),y) = \left(\begin{array}{c} -log(H(x)) : y = 1 \\ -log(1 - H(x)) : y = 0 \end{array}\right)
$

y == 1:  
- H(x) = 1 -> -log(z) = 0  
- H(x) = 0 -> -log(z) = infinity

y == 0:  
- H(x) = 0 -> -log(1 - z) = 0  
- H(x) = 1 -> -log(1 - z) = infinity


$
c(H(x),y) = -ylog(H(x))-(1-y)log(1 - H(x))
$


$
cost(W) = -\frac{1}{m}\sum_{i=1}^{m}ylog(H(x))+(1-y)log(1 - H(x))
$

```python
cost = tf.reduce_mean(-tf.reduce_sum(Y*tf.log(hypothesis) + (1-Y)*tf.log(1-hypothesis)))
```


## Minimize

$
W := W - a\frac{a}{aW}cost(W)
$

```python
a = tf.Variable(0.1)
optimizer = tf.train.GradientDescentOptimizer(a)
train = optimizer.minimize(cost)
```

In [51]:
import tensorflow as tf

## Initialize Variables

In [52]:
x_data = [[1, 2], [2, 3], [3, 1], [4, 3], [5, 3], [6, 2]]
y_data = [[0], [0], [0], [1], [1], [1]]

In [53]:
X = tf.placeholder(tf.float32, shape=[None, 2])
Y = tf.placeholder(tf.float32, shape=[None, 1])

In [54]:
W = tf.Variable(tf.random_normal([2, 1]), name='weight')
b = tf.Variable(tf.random_normal([1], name='bias'))

## Hypothesis

## $$
g(z) = \frac{1}{(1 + e^{-z})}
$$

In [55]:
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)

## Cost

## $$
cost(W) = -\frac{1}{m}\sum_{i=1}^{m}ylog(H(x))+(1-y)log(1 - H(x))
$$

In [56]:
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))

## Minimize

## $
W := W - a\frac{a}{aW}cost(W)
$


In [57]:
train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

## Accuacy computation
True if hypothesis > 0.5 else False

In [58]:
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

## Launch graph

In [77]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for step in range(10001):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data, Y: y_data})
        if step % 200 == 0:
            print(step, cost_val)
    
    h, c, a = sess.run([hypothesis, predicted, accuracy],
                      feed_dict={X: x_data, Y: y_data})
    print("\nHypothesis: \n", h, "\nCorrect (Y): \n", c, "\nAccuracy: \n", a)

0 1.49585
200 0.791276
400 0.660343
600 0.62799
800 0.609996
1000 0.595703
1200 0.583445
1400 0.572775
1600 0.563442
1800 0.555251
2000 0.548038
2200 0.541665
2400 0.536015
2600 0.530987
2800 0.526497
3000 0.522474
3200 0.518858
3400 0.515597
3600 0.512647
3800 0.50997
4000 0.507535
4200 0.505313
4400 0.503281
4600 0.501418
4800 0.499706
5000 0.49813
5200 0.496675
5400 0.49533
5600 0.494083
5800 0.492927
6000 0.491852
6200 0.490851
6400 0.489917
6600 0.489046
6800 0.488231
7000 0.487467
7200 0.486752
7400 0.48608
7600 0.485449
7800 0.484855
8000 0.484296
8200 0.483769
8400 0.483272
8600 0.482802
8800 0.482358
9000 0.481938
9200 0.48154
9400 0.481163
9600 0.480806
9800 0.480467
10000 0.480145

Hypothesis: 
 [[ 0.41688105]
 [ 0.92780554]
 [ 0.23023723]
 [ 0.94172376]
 [ 0.18945585]
 [ 0.76685083]
 [ 0.93775189]
 [ 0.58884984]
 [ 0.26096523]
 [ 0.53974587]
 [ 0.71116656]
 [ 0.17219089]
 [ 0.28684005]
 [ 0.29494804]
 [ 0.75023347]
 [ 0.45227915]
 [ 0.73633605]
 [ 0.85055715]
 [ 0.81413311]

# Classifying diabetes

In [68]:
import tensorflow as tf
import numpy as np

In [69]:
xy = np.loadtxt('data/data-03-diabetes.csv', delimiter=',', dtype=np.float32)

x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]

In [70]:
X = tf.placeholder(tf.float32, shape=[None, 8])
Y = tf.placeholder(tf.float32, shape=[None, 1])

In [71]:
W = tf.Variable(tf.random_normal([8, 1]), name='weight')
b = tf.Variable(tf.random_normal([1], name='bias'))

In [72]:
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)

In [73]:
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))

In [74]:
train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

In [75]:
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

In [76]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for step in range(10001):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data, Y: y_data})
        if step % 200 == 0:
            print(step, cost_val)
    
    h, c, a = sess.run([hypothesis, predicted, accuracy],
                      feed_dict={X: x_data, Y: y_data})
    print("\nHypothesis: \n", h, "\nCorrect (Y): \n", c, "\nAccuracy: \n", a)

0 0.628972
200 0.604527
400 0.592711
600 0.583717
800 0.575783
1000 0.568538
1200 0.561873
1400 0.55573
1600 0.550063
1800 0.544833
2000 0.540003
2200 0.535541
2400 0.531415
2600 0.527597
2800 0.524062
3000 0.520787
3200 0.51775
3400 0.514931
3600 0.512313
3800 0.50988
4000 0.507617
4200 0.50551
4400 0.503547
4600 0.501717
4800 0.50001
5000 0.498416
5200 0.496926
5400 0.495533
5600 0.494229
5800 0.493009
6000 0.491865
6200 0.490792
6400 0.489786
6600 0.488841
6800 0.487953
7000 0.487118
7200 0.486334
7400 0.485595
7600 0.484899
7800 0.484243
8000 0.483625
8200 0.483042
8400 0.482491
8600 0.481972
8800 0.481481
9000 0.481017
9200 0.480578
9400 0.480163
9600 0.479769
9800 0.479397
10000 0.479045

Hypothesis: 
 [[ 0.38338163]
 [ 0.93063539]
 [ 0.19092147]
 [ 0.94940913]
 [ 0.10516249]
 [ 0.7806353 ]
 [ 0.92639726]
 [ 0.49094453]
 [ 0.22233228]
 [ 0.61301565]
 [ 0.77007383]
 [ 0.13922971]
 [ 0.2986398 ]
 [ 0.19893201]
 [ 0.7044872 ]
 [ 0.46593416]
 [ 0.73689806]
 [ 0.7357102 ]
 [ 0.8349548