# Linear Regression

## Ref

* [Data-Science-from-Scratch](https://github.com/insightbook/Data-Science-from-Scratch/blob/master/code-python3/simple_linear_regression.py)
* [모두를 위한 머신러닝/딥러닝 강의](https://hunkim.github.io/ml/)
  - [lab-02-2-linear_regression_feed.py](https://github.com/hunkim/DeepLearningZeroToAll/blob/master/lab-02-2-linear_regression_feed.py)
  - [lab-04-2-multi_variable_matmul_linear_regression.py](https://github.com/hunkim/DeepLearningZeroToAll/blob/master/lab-04-2-multi_variable_matmul_linear_regression.py)

## Simple Linear Regression

In [1]:
from collections import Counter, defaultdict
from functools import partial, reduce
import lib.stats as st
import math, random
import numpy as np

In [2]:
def predict( w, b, x_i ):
    # [w]eight, [b]ias
    return x_1 * w + b

def error( w, b, x_i, y_i ):
    return y_i - predict( w, b, x_i )

def sum_of_squared_error( w, b, X, Y ):
    return sum( error( w, b, x_i, y_i ) ** 2  for x_i, y_i in zip( X, Y ) )

def least_squares_fit( x, y ):
    """
    given training values for x and y,
    find the least-squares values of alpha and beta
    """
    w = st.correlation( x, y ) * st.standard_deviation( y ) / st.standard_deviation( x )
    b = st.mean( y ) - w * st.mean( x )
    return w, b

def total_sum_of_squares( y ):
    return sum( v **2 for v in st.deviation(y) )

def r_squared( w, b, x, y ):
    return 1.0 - ( sum_of_squared_errors( w, b, x, y ) / total_sum_of_sqares( y ) )

def squared_error( x_i, y_i, theta ):
    w, b = theta
    return error( w, b, x_i, y_i ) ** 2

def squared_error_gradient( x_i, y_i, theta ):
    w, b = theta
    return [  -2 * error( w, b, x_i, y_i ),
              -2 * error( w, b, x_i, y_i ) * x_i ]


In [3]:
x_train = [49,41,40,25,21,21,19,19,18,18,16,15,15,15,15,14,14,13,13,13,13,12,12,11,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,8,8,8,8,8,8,8,8,8,8,8,8,8,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
y_train = [68.77,51.25,52.08,38.36,44.54,57.13,51.4,41.42,31.22,34.76,54.01,38.79,47.59,49.1,27.66,41.03,36.73,48.65,28.12,46.62,35.57,32.98,35,26.07,23.77,39.73,40.57,31.65,31.21,36.32,20.45,21.93,26.02,27.34,23.49,46.94,30.5,33.8,24.23,21.4,27.94,32.24,40.57,25.07,19.42,22.39,18.42,46.96,23.72,26.41,26.97,36.76,40.32,35.02,29.47,30.2,31,38.11,38.18,36.31,21.03,30.86,36.07,28.66,29.08,37.28,15.28,24.17,22.31,30.17,25.53,19.85,35.37,44.6,17.23,13.47,26.33,35.02,32.09,24.81,19.33,28.77,24.26,31.98,25.73,24.86,16.28,34.51,15.23,39.72,40.8,26.06,35.76,34.76,16.13,44.04,18.03,19.65,32.62,35.59,39.43,14.18,35.24,40.13,41.82,35.45,36.07,43.67,24.61,20.9,21.9,18.79,27.61,27.21,26.61,29.77,20.59,27.53,13.82,33.2,25,33.1,36.65,18.63,14.87,22.2,36.81,25.53,24.62,26.25,18.21,28.08,19.42,29.79,32.8,35.99,28.32,27.79,35.88,29.06,36.28,14.1,36.63,37.49,26.9,18.58,38.48,24.48,18.95,33.55,14.24,29.04,32.51,25.63,22.22,19,32.73,15.16,13.9,27.2,32.01,29.27,33,13.74,20.42,27.32,18.23,35.35,28.48,9.08,24.62,20.12,35.26,19.92,31.02,16.49,12.16,30.7,31.22,34.65,13.13,27.51,33.2,31.57,14.1,33.42,17.44,10.12,24.42,9.82,23.39,30.93,15.03,21.67,31.09,33.29,22.61,26.89,23.48,8.38,27.81,32.35,23.84]


In [4]:
w, b = least_squares_fit( x_train, y_train )


In [5]:
print( w, b )

0.9038659456058656 22.947552413469026


## Linear Regression via ML

### Single-variable

In [6]:
import tensorflow as tf

In [7]:
tf.set_random_seed(777)  # for reproducibility

# training data
# x_train = [1, 2, 3, 4]
# y_train = [0, -1, -2, -3]

# Model parameters
W = tf.Variable( tf.random_normal([1]), name="weight" )
b = tf.Variable( tf.random_normal([1]), name="bias" )

# Model input and output
x = tf.placeholder( tf.float32 )
y = tf.placeholder( tf.float32 )

hypothesis = x * W + b
cost = tf.reduce_mean( tf.square( hypothesis - y ) )
optimizer = tf.train.GradientDescentOptimizer( learning_rate=0.01 )
train = optimizer.minimize(cost)

In [8]:
x_train = [49,41,40,25,21,21,19,19,18,18,16,15,15,15,15,14,14,13,13,13,13,12,12,11,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,8,8,8,8,8,8,8,8,8,8,8,8,8,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
y_train = [68.77,51.25,52.08,38.36,44.54,57.13,51.4,41.42,31.22,34.76,54.01,38.79,47.59,49.1,27.66,41.03,36.73,48.65,28.12,46.62,35.57,32.98,35,26.07,23.77,39.73,40.57,31.65,31.21,36.32,20.45,21.93,26.02,27.34,23.49,46.94,30.5,33.8,24.23,21.4,27.94,32.24,40.57,25.07,19.42,22.39,18.42,46.96,23.72,26.41,26.97,36.76,40.32,35.02,29.47,30.2,31,38.11,38.18,36.31,21.03,30.86,36.07,28.66,29.08,37.28,15.28,24.17,22.31,30.17,25.53,19.85,35.37,44.6,17.23,13.47,26.33,35.02,32.09,24.81,19.33,28.77,24.26,31.98,25.73,24.86,16.28,34.51,15.23,39.72,40.8,26.06,35.76,34.76,16.13,44.04,18.03,19.65,32.62,35.59,39.43,14.18,35.24,40.13,41.82,35.45,36.07,43.67,24.61,20.9,21.9,18.79,27.61,27.21,26.61,29.77,20.59,27.53,13.82,33.2,25,33.1,36.65,18.63,14.87,22.2,36.81,25.53,24.62,26.25,18.21,28.08,19.42,29.79,32.8,35.99,28.32,27.79,35.88,29.06,36.28,14.1,36.63,37.49,26.9,18.58,38.48,24.48,18.95,33.55,14.24,29.04,32.51,25.63,22.22,19,32.73,15.16,13.9,27.2,32.01,29.27,33,13.74,20.42,27.32,18.23,35.35,28.48,9.08,24.62,20.12,35.26,19.92,31.02,16.49,12.16,30.7,31.22,34.65,13.13,27.51,33.2,31.57,14.1,33.42,17.44,10.12,24.42,9.82,23.39,30.93,15.03,21.67,31.09,33.29,22.61,26.89,23.48,8.38,27.81,32.35,23.84]


In [9]:
with tf.Session() as sess:
    sess.run( tf.global_variables_initializer() )
    
    for step in range( 1000 ):
        sess.run( train, feed_dict={ x: x_train, y: y_train } )
        
    W_val, b_val, cost_val = sess.run( [W, b, cost ], feed_dict={ x: x_train, y: y_train } )
    print( f"W: {W_val} b: {b_val} cost: {cost_val}" )

W: [0.9040943] b: [22.944702] cost: 65.00798034667969


In [10]:
# prediction
tf.Session().run( hypothesis, feed_dict={ x: 22, W: W_val, b: b_val } )

array([42.834778], dtype=float32)

### Multi-variable

In [11]:
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 3])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable(tf.random_normal([3, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

# Hypothesis
hypothesis = tf.matmul(X, W) + b

# Simplified cost/loss function
cost = tf.reduce_mean( tf.square( hypothesis - Y ) )

# Minimize
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)


In [12]:
x_train = [[73., 80., 75.],
          [93., 88., 93.],
          [89., 91., 90.],
          [96., 98., 100.],
          [73., 66., 70.]]
y_train = [[152.],
          [185.],
          [180.],
          [196.],
          [142.]]

In [13]:
with tf.Session() as sess:
    sess.run( tf.global_variables_initializer() )
    
    for step in range( 2000 ):
        sess.run( train, feed_dict={ X: x_train, Y: y_train } )
        
    W_val, b_val, cost_val = sess.run( [W, b, cost ], feed_dict={ X: x_train, Y: y_train } )
    print( f"W: {W_val} \nb: {b_val} \ncost: {cost_val}" )


W: [[0.11135243]
 [1.1548752 ]
 [0.7201789 ]] 
b: [2.0958202] 
cost: 14.198282241821289


In [14]:
# prediction
tf.Session().run( hypothesis, feed_dict={ X: [[54., 44., 32.]], W: W_val, b: b_val } )

array([[81.96908]], dtype=float32)