In [32]:
import tensorflow as tf


In [2]:
# Linear Regression on California housing dataset 

In [33]:
import numpy as np

In [34]:
from sklearn.datasets import fetch_california_housing

In [35]:
housing = fetch_california_housing()

In [36]:
m, n = housing.data.shape

In [37]:
housing_data_with_bias = np.c_[np.ones((m, 1)), housing.data]

In [38]:
X =  tf.constant(housing_data_with_bias, dtype=tf.float32, name="X" )

In [39]:
housing.target

array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894])

In [40]:
y_target = housing.target.reshape(-1, 1)

In [41]:
y = tf.constant(y_target, dtype=tf.float32, name="y")

In [42]:
XT = tf.transpose(X)

In [43]:
# Solving the parameters using normal equation solution

In [44]:
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

In [45]:
with tf.Session() as sess:
    theta_value = theta.eval()

In [46]:
theta_value

array([[-3.7185181e+01],
       [ 4.3633747e-01],
       [ 9.3952334e-03],
       [-1.0711310e-01],
       [ 6.4479220e-01],
       [-4.0338000e-06],
       [-3.7813708e-03],
       [-4.2348403e-01],
       [-4.3721911e-01]], dtype=float32)

In [47]:
# Verify using numpy and scikit learn

In [48]:
X = housing_data_with_bias

In [49]:
y = y_target

In [50]:
theta_np = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)

In [51]:
theta_np

array([[-3.69419202e+01],
       [ 4.36693293e-01],
       [ 9.43577803e-03],
       [-1.07322041e-01],
       [ 6.45065694e-01],
       [-3.97638942e-06],
       [-3.78654266e-03],
       [-4.21314378e-01],
       [-4.34513755e-01]])

In [52]:
from sklearn.linear_model import LinearRegression

In [53]:
lin_reg = LinearRegression()

In [54]:
lin_reg.fit(housing.data, y_target)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [71]:
np.r_[lin_reg.intercept_.reshape(-1, 1), lin_reg.coef_.T]

array([[-3.69419202e+01],
       [ 4.36693293e-01],
       [ 9.43577803e-03],
       [-1.07322041e-01],
       [ 6.45065694e-01],
       [-3.97638942e-06],
       [-3.78654265e-03],
       [-4.21314378e-01],
       [-4.34513755e-01]])

## Manually computing gradient descent 

In [73]:
from sklearn.preprocessing import StandardScaler

In [74]:
scaler = StandardScaler()

In [75]:
scaled_housing_data = scaler.fit_transform(housing.data)

In [76]:
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

In [77]:
n_epochs = 1000

In [78]:
learning_rate = 0.01

In [80]:
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")

In [81]:
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

In [82]:
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")

In [84]:
y_pred = tf.matmul(X, theta, name="predictions")

In [85]:
error = y_pred - y

In [86]:
mse = tf.reduce_mean(tf.square(error), name="mse")

In [87]:
gradients = 2/m * tf.matmul(tf.transpose(X), error)

In [88]:
training_op = tf.assign(theta, theta - learning_rate * gradients)

In [89]:
init = tf.global_variables_initializer()

In [90]:
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()

Epoch 0 MSE = 2.7544262
Epoch 100 MSE = 0.632222
Epoch 200 MSE = 0.5727805
Epoch 300 MSE = 0.5585007
Epoch 400 MSE = 0.54907
Epoch 500 MSE = 0.542288
Epoch 600 MSE = 0.53737885
Epoch 700 MSE = 0.533822
Epoch 800 MSE = 0.5312425
Epoch 900 MSE = 0.5293705


In [91]:
best_theta

array([[ 2.06855226e+00],
       [ 7.74078071e-01],
       [ 1.31192386e-01],
       [-1.17845066e-01],
       [ 1.64778143e-01],
       [ 7.44081801e-04],
       [-3.91945131e-02],
       [-8.61356556e-01],
       [-8.23479712e-01]], dtype=float32)

## Use Tensorflow Autodiff -- Automatic Differentiation --

In [92]:
n_epochs = 1000

In [93]:
learning_rate = 0.01

In [94]:
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")

In [95]:
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

In [96]:
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")

In [97]:
y_pred = tf.matmul(X, theta, name="predictions")

In [98]:
error = y_pred - y

In [99]:
mse = tf.reduce_mean(tf.square(error), name="mse")

In [100]:
gradients = tf.gradients(mse, [theta])[0]

In [101]:
training_op = tf.assign(theta, theta - learning_rate * gradients)

In [102]:
init = tf.global_variables_initializer()

In [103]:
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()

Epoch 0 MSE = 2.7544262
Epoch 100 MSE = 0.632222
Epoch 200 MSE = 0.5727805
Epoch 300 MSE = 0.5585007
Epoch 400 MSE = 0.54907
Epoch 500 MSE = 0.54228795
Epoch 600 MSE = 0.5373789
Epoch 700 MSE = 0.533822
Epoch 800 MSE = 0.5312425
Epoch 900 MSE = 0.5293704


In [104]:
best_theta

array([[ 2.06855249e+00],
       [ 7.74078071e-01],
       [ 1.31192386e-01],
       [-1.17845066e-01],
       [ 1.64778143e-01],
       [ 7.44078017e-04],
       [-3.91945094e-02],
       [-8.61356676e-01],
       [-8.23479772e-01]], dtype=float32)

In [105]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

In [111]:
optimizer  = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
# or use a different optimizer like Momentum optimizer 
# optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
training_op = optimizer.minimize(mse)

In [109]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()

Epoch 0 MSE = 2.7544262
Epoch 100 MSE = 0.632222
Epoch 200 MSE = 0.5727805
Epoch 300 MSE = 0.5585007
Epoch 400 MSE = 0.54907
Epoch 500 MSE = 0.54228795
Epoch 600 MSE = 0.5373789
Epoch 700 MSE = 0.533822
Epoch 800 MSE = 0.5312425
Epoch 900 MSE = 0.5293704


In [110]:
best_theta

array([[ 2.06855249e+00],
       [ 7.74078071e-01],
       [ 1.31192386e-01],
       [-1.17845066e-01],
       [ 1.64778143e-01],
       [ 7.44078017e-04],
       [-3.91945094e-02],
       [-8.61356676e-01],
       [-8.23479772e-01]], dtype=float32)