# 선형회귀분석

## Boston Housing Dataset

![title](img/house_001.png)

In [10]:
# 필요 라이브러리 로드
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [11]:
# 데이터 import
from sklearn.datasets import load_boston

boston_dataset = load_boston()
boston = pd.DataFrame(boston_dataset.data, columns=boston_dataset.feature_names)

## 데이터 준비

In [12]:
X_train = boston['LSTAT'].values.reshape([-1, 1])
y_train = boston_dataset.target.reshape([-1, 1])

In [13]:
X_train.shape

(506, 1)

In [4]:
X_train = np.transpose(X_train)
y_train = np.transpose(y_train)

In [5]:
print('X_train shape: {}'.format(X_train.shape))
print('y_train shape: {}'.format(y_train.shape))

X_train shape: (1, 506)
y_train shape: (1, 506)


In [6]:
def weight_variable(shape):
    initial = tf.zeros(shape)
    return tf.Variable(initial, name='weight')

def bias_variable(shape):
    initial = tf.zeros(shape)
    return tf.Variable(initial, name='bias')

$$Y = W^{T}X + b$$

In [7]:
# 변수 초기화
with tf.name_scope('scope_LinearRegression'):
    w = weight_variable([1, 1])
    b = bias_variable([1])

    x = tf.placeholder(tf.float32, shape=[None, 1])
    y = x * w + b
    y_t = tf.placeholder(tf.float32, shape=[None, 1])

손실함수(loss function)는 평균제곱오차(MSE, Mean Square Error)를 활용합니다.

$$MSE = \frac{1}{2m} \sum_{n=1}^{N} (t_{n} - y_{n})^2$$
$$\underset{w, b}{\operatorname{argmin}}{MSE}$$

In [8]:
with tf.name_scope('loss'):
    loss = tf.reduce_mean(tf.square(y_t - y))
    train_step = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)
    tf.summary.scalar('loss', loss) # 텐서보드에서 확인할 값

In [9]:
# 초기화
epochs = 10000
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    writer = tf.summary.FileWriter('./board/test', sess.graph)
    merged_summary = tf.summary.merge_all()
    
    # 학습과정 시각화를 위한 리스트
    w_list = []
    b_list = []

    for epoch in range(epochs):
        if (epoch)%2500 == 0: # epoch 10번마다 저장
            w_list.append(sess.run(w))
            b_list.append(sess.run(b))
            print("[Epoch: {}] w: {}, b: {}".format(epoch, sess.run(w), sess.run(b)))
        
        # tensorboard에 기록
        history, _ = sess.run([merged_summary, train_step], feed_dict={x: X_train, y_t: y_train})
        writer.add_summary(history, epoch)

[Epoch: 0] w: [[0.]], b: [0.]


ValueError: Cannot feed value of shape (1, 506) for Tensor 'scope_LinearRegression/Placeholder:0', which has shape '(?, 1)'

In [None]:
# 학습과정 시각화
def visualize(X_train, y_train, w_list, b_list, index):
    plt.subplot(2, 2, index+1)
    plt.title('{} epoch'.format(2500*(index+1)))
    plt.plot(X_train, y_train, 'ro')
    plt.plot(X_train, w_list[index][0] * X_train + b_list[index][0])
    plt.text(0.4, 0.2, 'w={}, b={}'.format(round(float(w_list[index][0]), 3), round(float(b_list[index][0]), 3)),
             fontsize=20)
    plt.grid(True)
# Setting
plt.figure(figsize=(20, 10))
plt.tight_layout(pad=0.4, w_pad=1.0, h_pad=1.0)
plt.subplots_adjust(hspace = .4)

for index in range(4):
    visualize(X_train, y_train, w_list, b_list, index)
plt.show()

In [None]:
!tensorboard --logdir=./board/test

In [None]:
# tensorboad log file 삭제
!rm -rf ./board/test

출처
* https://towardsdatascience.com/linear-regression-on-boston-housing-dataset-f409b7e4a155