## Linear Regression with TensorFlow

In [11]:
import numpy as np
from sklearn.datasets import fetch_california_housing
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

In [12]:
housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

In [13]:
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

with tf.Session() as sess:
    theta_value = theta.eval()

The main benefit of this code versus computing the Normal Equation directly using
NumPy is that TensorFlow will automatically run this on your GPU card if you have
one

## Implementing Gradient Descent

Manually Computing the Gradients

In [15]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_housing_data_plus_bias = scaler.fit_transform(housing_data_plus_bias)

In [22]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")

error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

In [23]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE = 8.860159
Epoch 100 MSE = 4.863471
Epoch 200 MSE = 4.8188615
Epoch 300 MSE = 4.8144155
Epoch 400 MSE = 4.8115587
Epoch 500 MSE = 4.809461
Epoch 600 MSE = 4.807912
Epoch 700 MSE = 4.8067646
Epoch 800 MSE = 4.805911
Epoch 900 MSE = 4.805276


### Using autodiff

In [26]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")

error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

In [27]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE = 7.376398
Epoch 100 MSE = 5.1811786
Epoch 200 MSE = 5.0411267
Epoch 300 MSE = 4.973048
Epoch 400 MSE = 4.9256163
Epoch 500 MSE = 4.891511
Epoch 600 MSE = 4.866918
Epoch 700 MSE = 4.849178
Epoch 800 MSE = 4.836382
Epoch 900 MSE = 4.8271513


## Using an Optimizer

In [28]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")

error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

In [29]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE = 9.338052
Epoch 100 MSE = 5.050057
Epoch 200 MSE = 4.9306183
Epoch 300 MSE = 4.893297
Epoch 400 MSE = 4.868105
Epoch 500 MSE = 4.8500276
Epoch 600 MSE = 4.8369937
Epoch 700 MSE = 4.8275933
Epoch 800 MSE = 4.8208113
Epoch 900 MSE = 4.81592


## Using an MomentumOptimizer

In [30]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")

error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=0.9)

training_op = optimizer.minimize(mse)

In [31]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE = 9.40516
Epoch 100 MSE = 4.8087015
Epoch 200 MSE = 4.8034024
Epoch 300 MSE = 4.8032637
Epoch 400 MSE = 4.803255
Epoch 500 MSE = 4.8032537
Epoch 600 MSE = 4.803254
Epoch 700 MSE = 4.8032546
Epoch 800 MSE = 4.803254
Epoch 900 MSE = 4.8032537
