In [1]:
import warnings
warnings.filterwarnings(action='ignore')

import matplotlib.pylab as plt
import matplotlib as mpl
import matplotlib.font_manager as fm
import seaborn as sns
import numpy as np

sns.set_style("whitegrid")
mpl.rcParams['axes.unicode_minus'] = False
plt.rcParams['font.size'] = 12

path = "/Library/Fonts/NanumGothic.otf"
font_name = fm.FontProperties(fname=path, size=20).get_name()

plt.rc('font', family=font_name)
fm._rebuild()

import tensorflow as tf

## Logistic Hypothesis

$$H(x) = Wx + b$$

H(x) 값을 0 ~ 1사이로 만드는 함수 g(z),

z = H(x)이고, z가 어떤 값이 되든 항상 0~1사이의 분포를 갖는다.  이것이 logistic function 또는 sigmoid function이다.

이 함수는 z 값이 커질 수록 1(g(z)=1)에 가까워지고, z 값이 작아질 수록 0(g(z)=0)에 가까워지는 함수이다.

$$ z = w^Tx $$

$$\text{logitstic}(x) = H(x) = \dfrac{1}{1+\exp{(-w^Tx)}}$$

## Cost

Logistic Regression의 경우, cost 함수 분포는 오목한 형태가 되지 않고, 울퉁불퉁한 곡선의 형태가 된다.

즉, cost 함수에 local minimum이 존재하게 되어 Gradient Descent Algorithm을 여기에서는 사용할 수 없다.

결과적으로 새로운 cost 함수는 다음과 같다.

$$ C(H(x), y) = \begin{cases} -log(H(x)) & \text{ if } y = 1 \\ -log(1-H(x)) & \text{ if } y = 0 \end{cases}$$

y=1일 때 예측한 값이 같아지면,  cost 함수 g(z)는 0으로 수렴하고, 실제 값 y = 0일 때도 예측한 값이 0에 가까워지면 cost 함수 g(z)도 0으로 수렴한다. 

반대로 예측이 틀리면 cost 함수 값은 무한대로 발산하는 값을 갖는다.

위의 식을 일반화하면,
$$C(H(x), y) = ylog(H(x))-(1-y)log(1-H(x))$$

## Minimize cost - Gradient Descent alogrithm

여기에서도 마찬가지로 Gradient Descent Algorithm을 사용할 수 있고, 

$$\text{cost}(W) = \dfrac{1}{m}\sum \ ylog(H(x)) + (1 - y)log(1-H(x))$$

weight를 업데이트 혹은 구하기 위해서는 다음과 같은 cost 함수의 미분 값이 필요하다.

$$ W:= W-\alpha \dfrac{\partial}{\partial w}\text{cost}(W)$$

위의 코스트 함수를 tensorflow를 사용한 코드는 다음과 같다.


### 예제 1

In [6]:
x_data = [[1, 2], [2, 3], [3, 1], [4, 3], [5, 3], [6, 2]]
y_data = [[0], [0], [0], [1], [1], [1]]

# placeholders 
X = tf.placeholder(tf.float32, shape=[None, 2])
Y = tf.placeholder(tf.float32, shape=[None, 1])

# Variables
W = tf.Variable(tf.random_normal([2, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

# model
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)

# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))

# Minimize/train
train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

# Accuracy computation
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)  # 예측값이 0.5보다 크면 1, 아니면 0
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))  # 예측값과 실제 값이 같으면 1, 아니면 0을 산출하고 이를 평균낸 값 산출

# Launch the graph in a session
sess = tf.Session()
sess.run(tf.global_variables_initializer())

for step in range(10001):
    cost_val, _ = sess.run([cost, train], feed_dict={X: x_data, Y: y_data})
    if step % 200 == 0:
        print(step, cost_val)
        
# Accuracy Report
h, c, a = sess.run([hypothesis, predicted, accuracy], feed_dict={X: x_data, Y: y_data})
print("\nHypothesis: ", h, "\nPrediction(Y):\n ", c, "\nAccuracy: ", a)

0 2.67585
200 0.5249104
400 0.5059943
600 0.48792645
800 0.47067872
1000 0.4542272
1200 0.438547
1400 0.42361143
1600 0.40939245
1800 0.39586127
2000 0.38298845
2200 0.37074432
2400 0.35909894
2600 0.34802306
2800 0.33748773
3000 0.32746473
3200 0.31792656
3400 0.30884734
3600 0.3002015
3800 0.29196495
4000 0.28411472
4200 0.27662888
4400 0.26948693
4600 0.26266918
4800 0.25615737
5000 0.24993402
5200 0.24398284
5400 0.23828839
5600 0.23283641
5800 0.22761329
6000 0.22260642
6200 0.21780388
6400 0.21319449
6600 0.20876785
6800 0.20451404
7000 0.20042409
7200 0.19648926
7400 0.19270158
7600 0.1890534
7800 0.18553759
8000 0.18214752
8200 0.178877
8400 0.17571999
8600 0.1726711
8800 0.16972496
9000 0.1668769
9200 0.16412215
9400 0.16145642
9600 0.15887563
9800 0.15637596
10000 0.15395372

Hypothesis:  [[0.03263738]
 [0.16133943]
 [0.31382307]
 [0.77732116]
 [0.93698716]
 [0.9793046 ]] 
Prediction(Y):
  [[0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [1.]] 
Accuracy:  1.0


### 예제2. Classifying diabetes

In [10]:
xy = np.loadtxt('data-03-diabetes.csv', delimiter=',', dtype=np.float32)
x_data = xy[:, :-1]
y_data = xy[:, [-1]]

In [11]:
x_data.shape, y_data.shape

((759, 8), (759, 1))

In [16]:
# placeholders
X = tf.placeholder(tf.float32, shape=[None, 8])
Y = tf.placeholder(tf.float32, shape=[None, 1])

# Variables
W = tf.Variable(tf.random_normal([8, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

# model
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)

# cost function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))

# Minimize / train
train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

# Accuracy computaiton
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

# Launch the graph in a session
sess = tf.Session()
sess.run(tf.global_variables_initializer())

for step in range(10001):
    cost_val, _ = sess.run([cost, train], feed_dict={X: x_data, Y: y_data})
    
    if step % 200 == 0:
        print(step, cost_val)
        
# Accuracy Report
h, c, a = sess.run([hypothesis, predicted, accuracy], feed_dict={X: x_data, Y: y_data})
print("\nHypothesis: ", h, "\nCorrect(Y):\n ", c, "\nAccuracy: ", a)

0 1.1866201
200 0.7620891
400 0.68429416
600 0.659299
800 0.6430997
1000 0.6293665
1200 0.61701906
1400 0.6057947
1600 0.5955708
1800 0.58625275
2000 0.5777571
2200 0.5700069
2400 0.56293213
2600 0.5564691
2800 0.55055946
3000 0.54515064
3200 0.540195
3400 0.53564966
3600 0.53147566
3800 0.5276385
4000 0.5241065
4200 0.52085155
4400 0.5178485
4600 0.5150744
4800 0.5125087
5000 0.51013315
5200 0.50793093
5400 0.50588727
5600 0.50398856
5800 0.5022227
6000 0.5005785
6200 0.49904615
6400 0.4976164
6600 0.49628127
6800 0.4950332
7000 0.49386543
7200 0.4927718
7400 0.4917467
7600 0.49078494
7800 0.48988196
8000 0.4890335
8200 0.48823544
8400 0.48748434
8600 0.4867768
8800 0.48610976
9000 0.48548067
9200 0.48488688
9400 0.48432583
9600 0.48379543
9800 0.48329383
10000 0.48281896

Hypothesis:  [[0.4305743 ]
 [0.9278971 ]
 [0.18831357]
 [0.94567287]
 [0.21201408]
 [0.7395502 ]
 [0.92856   ]
 [0.52209735]
 [0.24804205]
 [0.5655194 ]
 [0.7304999 ]
 [0.15707946]
 [0.31684166]
 [0.2974897 ]
 [0.71

코스트는 1.1866201에서 0.48281896로 감소하고(코스트는 그다지 감소하지 않음) 정확도는 0.77

### 예제 3. iris 데이터 분석

In [19]:
from sklearn.datasets import load_iris
iris = load_iris()
idx = np.in1d(iris.target, [0, 2])  # 두 종만 사용
X_data = iris.data[idx, :]
y_data = (iris.target[idx] / 2)[:, np.newaxis]  # 0, 1로 만듦, 분석 shape를 맞추기 위해 차원 추가

In [20]:
X_data.shape, y_data.shape

((100, 4), (100, 1))

In [21]:
# placeholders
X = tf.placeholder(tf.float32, shape=[None, 4])
Y = tf.placeholder(tf.float32, shape=[None, 1])

# Variables
W = tf.Variable(tf.random_normal([4, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

# model
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)

# cost function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))

# train
train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

# Accuracy computation
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

# Launch the graph in a session
sess = tf.Session()
sess.run(tf.global_variables_initializer())

for step in range(10001):
    cost_val, _ = sess.run([cost, train], feed_dict={X: X_data, Y: y_data})
    if step % 200 == 0:
        print(step, cost_val)
        
h, c, a = sess.run([hypothesis, predicted, accuracy], feed_dict={X: X_data, Y: y_data})
print("\nHypothesis: ", h, "\nCorrect(Y):\n ", c, "\nAccuracy: ", a)

0 2.0718272
200 0.2502231
400 0.11173405
600 0.07068116
800 0.051516306
1000 0.040495254
1200 0.033354964
1400 0.028358001
1600 0.024667066
1800 0.021830134
2000 0.01958172
2200 0.01775596
2400 0.016243894
2600 0.014971069
2800 0.01388484
3000 0.01294692
3200 0.012128824
3400 0.011408943
3600 0.010770565
3800 0.010200559
4000 0.0096884845
4200 0.009225921
4400 0.008805979
4600 0.008423042
4800 0.008072387
5000 0.007750106
5200 0.007452875
5400 0.007177868
5600 0.006922666
5800 0.0066852085
6000 0.0064637195
6200 0.0062566106
6400 0.006062523
6600 0.0058802734
6800 0.005708794
7000 0.0055471687
7200 0.005394556
7400 0.0052502197
7600 0.0051135146
7800 0.004983845
8000 0.004860667
8200 0.004743518
8400 0.004631959
8600 0.004525597
8800 0.004424073
9000 0.0043270676
9200 0.004234283
9400 0.004145438
9600 0.004060305
9800 0.003978648
10000 0.0039002555

Hypothesis:  [[2.3497343e-03]
 [4.9629211e-03]
 [3.8596392e-03]
 [8.9580119e-03]
 [2.4182200e-03]
 [2.9778481e-03]
 [5.1164329e-03]
 [4.01

코스트 함수가 거의 제로로 수렴하였고, 정확도도 1.0임