# SGD를 사용하여 2차 함수 모델 fiiting하기

In [None]:
import tensorflow as tf
from helper import generate_batches
import numpy as np
import matplotlib.pylab as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

## 데이터 생성하기

In [None]:
# function
f = lambda x: x**2 + 0.3 * x + 5.0

In [None]:
x_train = np.linspace(-1, 1, 50)
fx = f(x_train)

In [None]:
np.random.seed(320)
y_train = fx + 0.3 * np.random.rand(len(x_train))

In [None]:
plt.plot(x_train,y_train, 'o')
plt.grid()
plt.show()

## Quadratic Model의 다른 관점 1
$$
f(x_i,W) = Wx_i =
\begin{bmatrix}
w_0& w_1 & w_2
\end{bmatrix}
\begin{bmatrix}
x_i^2 \\ x_i \\ 1
\end{bmatrix}
$$

## Gradient Descent Method

In [None]:
W = tf.Variable(np.array([[-1.0, -1.0, -1.0]]), dtype=tf.float32)
x = tf.placeholder(dtype=tf.float32)
y = tf.placeholder(dtype=tf.float32)

loss = tf.reduce_mean(tf.square(tf.matmul(W, x) - y))

batch_size = 10
lr = 0.01
MaxEpochs = 1000
optimizer = tf.train.GradientDescentOptimizer(lr)
train = optimizer.minimize(loss)

feature_train = np.array([[xval**2, xval, 1] for xval in x_train]).T
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(MaxEpochs):
        if epoch % 100 == 0:
            curr_w, curr_loss = sess.run([W, loss], feed_dict={x:feature_train, y:y_train})
            print(epoch, curr_w,curr_loss)
        sess.run(train, feed_dict={x:feature_train, y:y_train})
    
    w_tf_sgd1 = sess.run(W)

In [None]:
w_tf_sgd1

In [None]:
feature_train = np.array([[xval**2, xval, 1] for xval in x_train]).T
plt.plot(x_train, y_train, 'o')
plt.plot(x_train, np.dot(w_tf_sgd1, feature_train).flatten() , 'r-')
plt.grid()
plt.show()

## Quadratic Model의 다른 관점 2
$$
f(x_i,W) = x_iW  =
\begin{bmatrix}
x_i^2 & x_i & 1
\end{bmatrix}
\begin{bmatrix}
w_0 \\ w_1 \\ w_2
\end{bmatrix}
$$

In [None]:
from sklearn import linear_model
feature_train = [[xval**2, xval, 1.0] for xval in x_train]
reg = linear_model.LinearRegression()
reg.fit(feature_train, y_train)
print(reg.coef_, reg.intercept_)

In [None]:
plt.plot(x_train,y_train, 'o')
plt.plot(x_train,reg.predict(feature_train), 'r-')
plt.grid()
plt.show()

### Gradient Descent

In [None]:
import tensorflow as tf
from helper import generate_batches
W = tf.Variable(np.array([-1.0, -1.0, -1.0]).reshape(-1,1), dtype=tf.float32)
x = tf.placeholder(dtype=tf.float32, shape=(None,3))
y = tf.placeholder(dtype=tf.float32, shape=(None,1))

loss = tf.reduce_mean(tf.square(tf.matmul(x, W) - y))

lr = 0.01
MaxEpochs = 1000
optimizer = tf.train.GradientDescentOptimizer(lr)
train = optimizer.minimize(loss)

feature_train = np.array([[xval**2, xval, 1] for xval in x_train])
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(MaxEpochs):
        if epoch % 100 == 0:
            curr_w, curr_loss = sess.run([W, loss], feed_dict={x:feature_train, y:y_train.reshape(-1,1)})
        sess.run(train, feed_dict={x:feature_train, y:y_train.reshape(-1,1)}) # 수정후
    w_tf_gd = sess.run(W)
print(w_tf_gd)

In [None]:
plt.plot(x_train, y_train, 'o')
plt.plot(x_train, np.dot(feature_train, w_tf_gd).flatten() , 'r-')
plt.grid()
plt.show()

###  Stochastic Gradient Descent 적용
1. 다음과 같이 Parameter를 설정
    1. `batch_size=10`
    1. `learning_rate=0.01`
    1. `w0=np.array([-1.0, -1.0, -1.0])`
    1. `MaxEpochs = 1000`
1. `np.random.shuffle()` 이용하여 데이터 골고루 섞기
1. 아래 for loop 안에 SGD를 구현하시면 됩니다.
    ```python
    for epoch in range(MaxEpochs):
        for x_batch, y_batch in generate_batches(_, _, _):
            # do gradient descent with x_batch and y_batch
    ```

In [None]:
W = tf.Variable(np.array([-1.0, -1.0, -1.0]).reshape(-1,1), dtype=tf.float32)
x = tf.placeholder(dtype=tf.float32, shape=(None,3))
y = tf.placeholder(dtype=tf.float32, shape=(None,1))

loss = tf.reduce_mean(tf.square(tf.matmul(x, W) - y))

# TODO1
batch_size = None
lr = None
MaxEpochs = None

optimizer = tf.train.GradientDescentOptimizer(lr)
train = optimizer.minimize(loss)

np.random.seed(320)
# TODO2
shuffled_id = None
np.random.shuffle(None)
shuffled_x_train = None
shuffled_y_train = None

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(MaxEpochs):
        if epoch % 100 == 0:
            curr_w, curr_loss = sess.run([W, loss], feed_dict={x:shuffled_x_train, y:shuffled_y_train})
            print(epoch, curr_w.flatten(),curr_loss)
        # TODO3
        None
        None

    w_tf_sgd = sess.run(W)

In [None]:
feature_train = np.array([[xval**2, xval, 1] for xval in x_train])
plt.plot(x_train, y_train, 'o')
plt.plot(x_train, np.dot(feature_train, w_tf_sgd).flatten() , 'r-')
plt.grid()
plt.show()