# Linear Regression

* y와 한 개 이상의 독립 변수 (또는 설명 변수) X와의 선형 상관 관계를 모델링하는 회귀분석 기법이다. 한 개의 설명 변수에 기반한 경우에는 단순 선형 회귀, 둘 이상의 설명 변수에 기반한 경우에는 다중 선형 회귀라고 한다. [참고: 위키피디아](https://ko.wikipedia.org/wiki/선형_회귀)

$$y_{\textrm{pred}} = \boldsymbol{W}^{\top}\boldsymbol{x} + b$$

* $\boldsymbol{x} = [x_{1}, x_{2}, \cdots, x_{d}]$
* $\boldsymbol{W} = [w_{1}, w_{2}, \cdots, w_{d}]$
* Loss function: $\mathcal{L} = \sum^{N} (y_{\textrm{pred}} - y)^{2}$

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

sess_config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))

tf.set_random_seed(219)
np.random.seed(219)

## Phase 1. Build a model

### Make data

In [None]:
a = 1
b = -4
c = 2
d = -1
data_X = np.random.uniform(low=-1, high=4, size=200)
data_y = a * data_X**3 + b * data_X**2 + c * data_X + d + np.random.normal(0, 1, 200)

plt.plot(data_X, data_y, 'ro')
plt.axhline(0, color='black', lw=1)
plt.axvline(0, color='black', lw=1)
plt.show()

### Create placeholders for inputs and labels

In [None]:
# X: inputs
X = tf.placeholder(tf.float32, shape=[3], name='X')
# y: labels
y = tf.placeholder(tf.float32, name='y')

### Create weight and bias

In [None]:
# create Variables
W = tf.Variable(tf.random_normal(shape=[3]), name="weights")
b = tf.Variable(tf.random_normal([1]), name="bias")

### Build a model: $y = \boldsymbol{W} \boldsymbol{x} + b$

In [None]:
y_pred = tf.reduce_sum(W * X) + b

### Define loss function

In [None]:
loss = tf.square(y_pred - y, name="loss")

### Create a optimizer

In [None]:
train_op = tf.train.GradientDescentOptimizer(learning_rate=0.0001).minimize(loss)

## Phase2. Train a model

### Train a model

In [None]:
with tf.Session(config=sess_config) as sess:
  # Initialize all variables
  sess.run(tf.global_variables_initializer())
  
  writer = tf.summary.FileWriter('graphs/code09_linear_reg_3', sess.graph)
  
  # train the model
  max_epoch = 100
  for epoch in range(max_epoch+1):
    total_loss = 0.0
    shuffle_index = np.random.permutation(len(data_X))
    for i in shuffle_index:
      x_ = data_X[i]
      y_ = data_y[i]
      feed_X = [x_**3, x_**2, x_]
      _, loss_ = sess.run([train_op, loss],
                          feed_dict={X: feed_X,
                                     y: y_})
      total_loss += loss_
    total_loss /= len(data_X)
    if epoch % 10 == 0:
      print('Epoch %d: total_loss: %f' % (epoch, total_loss))
      
  writer.close()
  W_, b_ = sess.run([W, b])

### Print the results: W and b

In [None]:
#a = 1
#b = -4
#c = 2
#d = -1
print(W_, b_)

### Plot the results

In [None]:
plt.plot(data_X, data_y, 'ro', label='Real data')
data_X.sort()
plt.plot(data_X, W_[0] * data_X**3 + W_[1] * data_X**2 + W_[0] * data_X + d, lw=5, label='model')

plt.axhline(0, color='black', lw=1)
plt.axvline(0, color='black', lw=1)
plt.legend()
plt.show()

## 직접 실습

* 여러가지 hyper-parameter들을 바꿔가면서 accuracy를 높혀보자