In [1]:
# import warnings
# warnings.filterwarnings('ignore')

# tensorflow 2가 설치되어있지만 1을 쓸수 있도록 해줌 
# import tensorflow.compat.v1 as tf
# tf.disable_v2_behavior()

import tensorflow as tf
import numpy as np

### build hypothesis and cost

In [2]:
x_data = [1,2,3,4,5]
y_data = [1,2,3,4,5]

W = tf.Variable(2.9)
b = tf.Variable(0.5)

# hypothesis = W*x + b
hypothesis = W*x_data + b

$ cost(W,b) = \frac{1}{m}\Sigma(H(x^{(i)})-y^{(i)})^2 $

In [3]:
cost = tf.reduce_mean(tf.square(hypothesis - y_data))

In [4]:
cost

<tf.Tensor: shape=(), dtype=float32, numpy=45.660004>

In [5]:
# tf.reduce_mean() : 차원이 줄어 들면서 mean을 계산
v = [1., 2., 3., 4.] # rank = 1
print(tf.reduce_mean(v))

tf.Tensor(2.5, shape=(), dtype=float32)


In [6]:
# tf.square() : 제곱
tf.square(3)

<tf.Tensor: shape=(), dtype=int32, numpy=9>

## Gradient descent : 경사 하강 알고리즘
### $cost(W,b)$를 최소화 하는 $W,b$를 찾음 

In [7]:
# learning rate 초기화 
learning_rate = 0.01

# gradient descent : 한 번의 과정 
# tape에 변수에 대한 정보를 기록 
with tf.GradientTape() as tape :
    hypothesis = W * x_data + b
    cost = tf.reduce_mean(tf.square(hypothesis - y_data))

# tape에서 cost의 W,b에 대한 미분값을 각각 반환 (W의 기울기, b의 기울기) 
W_grad, b_grad = tape.gradient(cost, [W, b])

# A.assign_sub(B) : A = A -B / A -= B 
W.assign_sub(learning_rate * W_grad)  # 기울기를 얼마만큼 반영할 것인지 
b.assign_sub(learning_rate * b_grad)


<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=0.376>

In [9]:
# 위의 방법을 for 문을 이용해서 반복해서 수행 
W = tf.Variable(2.9)
b = tf.Variable(0.5)

for i in range(100) :
    # Gradient descent
    with tf.GradientTape() as tape :
        hypothesis = W * x_data + b
        cost = tf.reduce_mean(tf.square(hypothesis - y_data))
    W_grad, b_grad = tape.gradient(cost, [W, b])
    W.assign_sub(learning_rate * W_grad)
    b.assign_sub(learning_rate * b_grad)
    
    if i % 10 == 0 : # i 값이 10의 배수가 될때마다 출력
        print("{:5} | {:10.4}|{:10.4}|{:10.6f}".format(i, W.numpy(), b.numpy(), cost))
    
    
    
    

    0 |      2.452|     0.376| 45.660004
   10 |      1.104|  0.003398|  0.206336
   20 |      1.013|  -0.02091|  0.001026
   30 |      1.007|  -0.02184|  0.000093
   40 |      1.006|  -0.02123|  0.000083
   50 |      1.006|  -0.02053|  0.000077
   60 |      1.005|  -0.01984|  0.000072
   70 |      1.005|  -0.01918|  0.000067
   80 |      1.005|  -0.01854|  0.000063
   90 |      1.005|  -0.01793|  0.000059


In [16]:
# Full Code
import tensorflow as tf

# Data
x_data = [1, 2, 3, 4, 5]
y_data = [1, 2, 3, 4, 5]

# W, b initialize
W = tf.Variable(2.9) # 2.9 부터 시작 
b = tf.Variable(0.5) # 0.5 부터 시작 

learning_rate = 0.01

for i in range(100+1) :
    with tf.GradientTape() as tape :
        hypothesis = W * x_data + b
        cost = tf.reduce_mean(tf.square(hypothesis - y_data))
    W_grad, b_grad = tape.gradient(cost, [W, b])
    W.assign_sub(learning_rate * W_grad)
    b.assign_sub(learning_rate * b_grad)
    if i % 10 == 0 :
        print("{:5} | {:10.4}|{:10.4}|{:10.6f}".format(i, W.numpy(), b.numpy(), cost))
        
        
        # cost가 0에 가까울수록 실제 데이터와 유사하다! 

    0 |      2.452|     0.376| 45.660004
   10 |      1.104|  0.003398|  0.206336
   20 |      1.013|  -0.02091|  0.001026
   30 |      1.007|  -0.02184|  0.000093
   40 |      1.006|  -0.02123|  0.000083
   50 |      1.006|  -0.02053|  0.000077
   60 |      1.005|  -0.01984|  0.000072
   70 |      1.005|  -0.01918|  0.000067
   80 |      1.005|  -0.01854|  0.000063
   90 |      1.005|  -0.01793|  0.000059
  100 |      1.005|  -0.01733|  0.000055


In [18]:
print(W*5 + b)
print(W*2.5 + b)

tf.Tensor(5.00667, shape=(), dtype=float32)
tf.Tensor(2.4946702, shape=(), dtype=float32)
