# Logistic regression 구현
- hypo :
- logistic function(sigmoid):
- binary cross entrophy:
- gradient descent:

# 1. 데이터 준비

In [1]:
import numpy as np

x_data = np.array([2, 4, 6, 8, 10, 12, 14, 16, 18, 20]).reshape(10,1)  #10행 1열
t_data = np.array([0, 0, 0, 0,  0,  0,  1,  1,  1,  1]).reshape(10,1)  #10행 1열

print("x_data.shape = ", x_data.shape, ", t_data.shape = ", t_data.shape)

x_data.shape =  (10, 1) , t_data.shape =  (10, 1)


# 2. 데이터 전처리, Scale, 표준화, train/test split

# 3. 모델: 가설함수 설정
- hypo :
- logistic function(sigmoid):

## 3-1. hypo:
- $ h(x) = wx + b$

In [2]:
W = np.random.rand(1,1)
b = np.random.rand(1)

In [3]:
def hypoF(X):
    return np.dot(X,W) + b

## 3-2. logistic function
- $sigmoid(z) = \frac{1}{(1+e^{-z})}$

In [4]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [5]:
z = hypoF(np.array([[150]]))

In [6]:
z

array([[25.91939187]])

In [7]:
sigmoid(z)

array([[1.]])

# 4. Loss function: binary cross entrophy
- $P(C=1 | x) = y, P(C=0 |x) = 1-y, P(C=t |x) = y^{t}*(1-y)^{1-t}$
- likelyhood : L(W, b) = $Π_{i=0}^{n} P(C=t_i | x_i)= y^{t}*(1-y)^{1-t}$ --> w와 b값을 바꾸면서 $P(C=t_i | x_i)$ 값이 가장 높게 나왔을 때가 최적의 W와 b값이다.
- negative log likely hood: $\sum {i=0}^{n} -t*log(y) - (1-t)log(1-y)$

In [8]:
def bce(X, target):
    z = np.dot(X, W) + b
    y_hat = sigmoid(z) #예측값

    return np.sum(-target*np.log(y_hat) - (1-target)*np.log(1-y_hat))

In [9]:
bce(x_data, t_data)

13.35968786729973

## 5. Gradient 구하기

In [10]:
from re import T
# loss대비, parmeter (W,b) 값의 기울기가 어떤가?
def gradient(bceF, param):
    h = 1e-5
    grads = np.zeros_like(param)

    for idx in range(param.size):
        tmp = param[idx]
        param[idx] = tmp + h
        l_f = bceF(param)

        param[idx] = tmp - h
        l_b = bceF(param)

        grads[idx] = (l_f - l_b) / (2*h)
        param[idx] = tmp

    return grads

In [11]:
bcef = lambda x : bce(x_data, t_data)

In [12]:
# grad_w = gradient(bcef, W)
# grad_b = gradient(bcef, b)

# 6. gradient update
- $W = W - \alpha\frac{\partial{Cost(W,b)}}{\partial{W}}$

In [13]:
learning_rate  = 0.001

for idx in range(10000):
    grad_w = gradient(bcef, W)
    grad_b = gradient(bcef, b)
    W = W-learning_rate*grad_w
    b = b-learning_rate*grad_b

    loss = bce(x_data, t_data)
    print("epoch: ", idx, "Loss값 : ", loss, "W값: ", W, "b값 : ", b)
    print(W)

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
epoch:  7500 Loss값 :  1.9633126843361486 W값:  [[0.41136356]] b값 :  [-5.0823961]
[[0.41136356]]
epoch:  7501 Loss값 :  1.9631924562957013 W값:  [[0.41138938]] b값 :  [-5.08274088]
[[0.41138938]]
epoch:  7502 Loss값 :  1.9630722531860598 W값:  [[0.4114152]] b값 :  [-5.08308563]
[[0.4114152]]
epoch:  7503 Loss값 :  1.9629520749985983 W값:  [[0.41144102]] b값 :  [-5.08343034]
[[0.41144102]]
epoch:  7504 Loss값 :  1.9628319217246823 W값:  [[0.41146684]] b값 :  [-5.08377502]
[[0.41146684]]
epoch:  7505 Loss값 :  1.962711793355693 W값:  [[0.41149265]] b값 :  [-5.08411966]
[[0.41149265]]
epoch:  7506 Loss값 :  1.9625916898830302 W값:  [[0.41151846]] b값 :  [-5.08446427]
[[0.41151846]]
epoch:  7507 Loss값 :  1.9624716112980731 W값:  [[0.41154426]] b값 :  [-5.08480884]
[[0.41154426]]
epoch:  7508 Loss값 :  1.962351557592197 W값:  [[0.41157007]] b값 :  [-5.08515337]
[[0.41157007]]
epoch:  7509 Loss값 :  1.9622315287568297 W값:  [[0.41159587]] b값 :  [-5.08549787]
[[0.411595

In [14]:
new_input = np.array([[13]])
z = hypoF(new_input)

In [15]:
z

array([[0.24582716]])

In [16]:
y_hat = sigmoid(z)

In [17]:
y_hat

array([[0.56114916]])