In [1]:
import numpy as np
import tensorflow as tf
from sklearn.datasets import fetch_california_housing

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [2]:
housing = fetch_california_housing()
m,n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

In [61]:
'''
Compute Graph
'''
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

In [71]:
with tf.Session() as sess:
    theta_value = theta.eval()

print theta_value

[[ -3.74651413e+01]
 [  4.35734153e-01]
 [  9.33829229e-03]
 [ -1.06622010e-01]
 [  6.44106984e-01]
 [ -4.25131839e-06]
 [ -3.77322501e-03]
 [ -4.26648885e-01]
 [ -4.40514028e-01]]


<hr>
## Gradient Descent

In [3]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

In [4]:
reset_graph()

'''
Explicit Gradient Descent
'''
reset_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")

y_pred = tf.matmul(X, theta, name="predictions")

error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()

print best_theta

('Epoch', 0, 'MSE =', 9.1615419)
('Epoch', 100, 'MSE =', 9.1615419)
('Epoch', 200, 'MSE =', 9.1615419)
('Epoch', 300, 'MSE =', 9.1615419)
('Epoch', 400, 'MSE =', 9.1615419)
('Epoch', 500, 'MSE =', 9.1615419)
('Epoch', 600, 'MSE =', 9.161541)
('Epoch', 700, 'MSE =', 9.1615419)
('Epoch', 800, 'MSE =', 9.1615419)
('Epoch', 900, 'MSE =', 9.1615419)
[[-0.1673944 ]
 [-0.46283674]
 [-0.04063368]
 [-0.27085733]
 [ 0.90942287]
 [ 0.88372922]
 [ 0.2296679 ]
 [-0.28315711]
 [ 0.18720484]]


In [58]:
'''
Gradient Descent using AutoDiff
'''

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

n_epochs = 1000
learning_rate = 0.01

scaler.fit(housing_data_plus_bias)
scaled_housing_data_plus_bias = scaler.transform(housing_data_plus_bias)

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")


error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if(epoch % 100 == 0):
            print("Epoch", epoch, "MSE=", mse.eval())
            sess.run(training_op)
        
        best_theta = theta.eval()

print best_theta

('Epoch', 0, 'MSE=', 9.9236212)
('Epoch', 100, 'MSE=', 9.6839266)
('Epoch', 200, 'MSE=', 9.4575777)
('Epoch', 300, 'MSE=', 9.2437639)
('Epoch', 400, 'MSE=', 9.0417309)
('Epoch', 500, 'MSE=', 8.8507671)
('Epoch', 600, 'MSE=', 8.6702118)
('Epoch', 700, 'MSE=', 8.4994469)
('Epoch', 800, 'MSE=', 8.3378925)
('Epoch', 900, 'MSE=', 8.1850033)
[[-0.94450974]
 [-0.63573897]
 [ 0.59233904]
 [ 0.88818699]
 [-0.15050341]
 [-0.50994772]
 [-0.1548674 ]
 [ 0.69803715]
 [ 0.56587505]]


In [59]:
'''
Gradient Descent Using Optimizer
'''

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

n_epochs = 100000
learning_rate = 0.01

scaler.fit(housing_data_plus_bias)
scaled_housing_data_plus_bias = scaler.transform(housing_data_plus_bias)

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")


error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

#optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if(epoch % 100 == 0):
            print("Epoch", epoch, "MSE=", mse.eval())
            sess.run(training_op)
        
        best_theta = theta.eval()

print best_theta

('Epoch', 0, 'MSE=', 6.2278886)
('Epoch', 100, 'MSE=', 6.1584239)
('Epoch', 200, 'MSE=', 6.034677)
('Epoch', 300, 'MSE=', 5.8760309)
('Epoch', 400, 'MSE=', 5.702775)
('Epoch', 500, 'MSE=', 5.5333381)
('Epoch', 600, 'MSE=', 5.382309)
('Epoch', 700, 'MSE=', 5.2593365)
('Epoch', 800, 'MSE=', 5.1689014)
('Epoch', 900, 'MSE=', 5.1107984)
('Epoch', 1000, 'MSE=', 5.0811357)
('Epoch', 1100, 'MSE=', 5.0736012)
('Epoch', 1200, 'MSE=', 5.0807905)
('Epoch', 1300, 'MSE=', 5.095386)
('Epoch', 1400, 'MSE=', 5.1110754)
('Epoch', 1500, 'MSE=', 5.1231346)
('Epoch', 1600, 'MSE=', 5.1286726)
('Epoch', 1700, 'MSE=', 5.1265693)
('Epoch', 1800, 'MSE=', 5.1171885)
('Epoch', 1900, 'MSE=', 5.1019535)
('Epoch', 2000, 'MSE=', 5.0828762)
('Epoch', 2100, 'MSE=', 5.0621095)
('Epoch', 2200, 'MSE=', 5.0415878)
('Epoch', 2300, 'MSE=', 5.0227866)
('Epoch', 2400, 'MSE=', 5.0066061)
('Epoch', 2500, 'MSE=', 4.9933672)
('Epoch', 2600, 'MSE=', 4.9829021)
('Epoch', 2700, 'MSE=', 4.9746943)
('Epoch', 2800, 'MSE=', 4.9680462)
(

('Epoch', 23600, 'MSE=', 4.80336)
('Epoch', 23700, 'MSE=', 4.8033571)
('Epoch', 23800, 'MSE=', 4.8033547)
('Epoch', 23900, 'MSE=', 4.8033519)
('Epoch', 24000, 'MSE=', 4.803349)
('Epoch', 24100, 'MSE=', 4.8033471)
('Epoch', 24200, 'MSE=', 4.8033447)
('Epoch', 24300, 'MSE=', 4.8033423)
('Epoch', 24400, 'MSE=', 4.8033404)
('Epoch', 24500, 'MSE=', 4.8033381)
('Epoch', 24600, 'MSE=', 4.8033357)
('Epoch', 24700, 'MSE=', 4.8033338)
('Epoch', 24800, 'MSE=', 4.8033319)
('Epoch', 24900, 'MSE=', 4.8033299)
('Epoch', 25000, 'MSE=', 4.8033276)
('Epoch', 25100, 'MSE=', 4.8033261)
('Epoch', 25200, 'MSE=', 4.8033237)
('Epoch', 25300, 'MSE=', 4.8033228)
('Epoch', 25400, 'MSE=', 4.8033214)
('Epoch', 25500, 'MSE=', 4.803319)
('Epoch', 25600, 'MSE=', 4.803318)
('Epoch', 25700, 'MSE=', 4.8033161)
('Epoch', 25800, 'MSE=', 4.8033147)
('Epoch', 25900, 'MSE=', 4.8033133)
('Epoch', 26000, 'MSE=', 4.8033123)
('Epoch', 26100, 'MSE=', 4.8033109)
('Epoch', 26200, 'MSE=', 4.8033094)
('Epoch', 26300, 'MSE=', 4.803307

('Epoch', 46600, 'MSE=', 4.8032546)
('Epoch', 46700, 'MSE=', 4.8032546)
('Epoch', 46800, 'MSE=', 4.8032546)
('Epoch', 46900, 'MSE=', 4.8032551)
('Epoch', 47000, 'MSE=', 4.8032546)
('Epoch', 47100, 'MSE=', 4.8032546)
('Epoch', 47200, 'MSE=', 4.8032546)
('Epoch', 47300, 'MSE=', 4.8032546)
('Epoch', 47400, 'MSE=', 4.8032546)
('Epoch', 47500, 'MSE=', 4.8032541)
('Epoch', 47600, 'MSE=', 4.8032546)
('Epoch', 47700, 'MSE=', 4.8032546)
('Epoch', 47800, 'MSE=', 4.8032541)
('Epoch', 47900, 'MSE=', 4.8032546)
('Epoch', 48000, 'MSE=', 4.8032541)
('Epoch', 48100, 'MSE=', 4.8032546)
('Epoch', 48200, 'MSE=', 4.8032541)
('Epoch', 48300, 'MSE=', 4.8032541)
('Epoch', 48400, 'MSE=', 4.8032546)
('Epoch', 48500, 'MSE=', 4.8032541)
('Epoch', 48600, 'MSE=', 4.8032541)
('Epoch', 48700, 'MSE=', 4.8032541)
('Epoch', 48800, 'MSE=', 4.8032546)
('Epoch', 48900, 'MSE=', 4.8032546)
('Epoch', 49000, 'MSE=', 4.8032541)
('Epoch', 49100, 'MSE=', 4.8032541)
('Epoch', 49200, 'MSE=', 4.8032541)
('Epoch', 49300, 'MSE=', 4.8

('Epoch', 69400, 'MSE=', 4.8032537)
('Epoch', 69500, 'MSE=', 4.8032541)
('Epoch', 69600, 'MSE=', 4.8032541)
('Epoch', 69700, 'MSE=', 4.8032541)
('Epoch', 69800, 'MSE=', 4.8032537)
('Epoch', 69900, 'MSE=', 4.8032537)
('Epoch', 70000, 'MSE=', 4.8032537)
('Epoch', 70100, 'MSE=', 4.8032541)
('Epoch', 70200, 'MSE=', 4.8032541)
('Epoch', 70300, 'MSE=', 4.8032541)
('Epoch', 70400, 'MSE=', 4.8032537)
('Epoch', 70500, 'MSE=', 4.8032537)
('Epoch', 70600, 'MSE=', 4.8032541)
('Epoch', 70700, 'MSE=', 4.8032537)
('Epoch', 70800, 'MSE=', 4.8032537)
('Epoch', 70900, 'MSE=', 4.8032537)
('Epoch', 71000, 'MSE=', 4.8032537)
('Epoch', 71100, 'MSE=', 4.8032537)
('Epoch', 71200, 'MSE=', 4.8032537)
('Epoch', 71300, 'MSE=', 4.8032537)
('Epoch', 71400, 'MSE=', 4.8032537)
('Epoch', 71500, 'MSE=', 4.8032537)
('Epoch', 71600, 'MSE=', 4.8032537)
('Epoch', 71700, 'MSE=', 4.8032541)
('Epoch', 71800, 'MSE=', 4.8032537)
('Epoch', 71900, 'MSE=', 4.8032537)
('Epoch', 72000, 'MSE=', 4.8032537)
('Epoch', 72100, 'MSE=', 4.8

('Epoch', 92900, 'MSE=', 4.8032541)
('Epoch', 93000, 'MSE=', 4.8032541)
('Epoch', 93100, 'MSE=', 4.8032541)
('Epoch', 93200, 'MSE=', 4.8032541)
('Epoch', 93300, 'MSE=', 4.8032537)
('Epoch', 93400, 'MSE=', 4.8032541)
('Epoch', 93500, 'MSE=', 4.8032537)
('Epoch', 93600, 'MSE=', 4.8032541)
('Epoch', 93700, 'MSE=', 4.8032541)
('Epoch', 93800, 'MSE=', 4.8032537)
('Epoch', 93900, 'MSE=', 4.8032541)
('Epoch', 94000, 'MSE=', 4.8032541)
('Epoch', 94100, 'MSE=', 4.8032541)
('Epoch', 94200, 'MSE=', 4.8032541)
('Epoch', 94300, 'MSE=', 4.8032541)
('Epoch', 94400, 'MSE=', 4.8032537)
('Epoch', 94500, 'MSE=', 4.8032541)
('Epoch', 94600, 'MSE=', 4.8032541)
('Epoch', 94700, 'MSE=', 4.8032541)
('Epoch', 94800, 'MSE=', 4.8032541)
('Epoch', 94900, 'MSE=', 4.8032537)
('Epoch', 95000, 'MSE=', 4.8032541)
('Epoch', 95100, 'MSE=', 4.8032537)
('Epoch', 95200, 'MSE=', 4.8032537)
('Epoch', 95300, 'MSE=', 4.8032537)
('Epoch', 95400, 'MSE=', 4.8032537)
('Epoch', 95500, 'MSE=', 4.8032541)
('Epoch', 95600, 'MSE=', 4.8

<hr>
## Place holders

In [67]:
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5
with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A:[[1,2,3]]})
    B_val_2 = B.eval(feed_dict={A:[[4,5,6],[7,8,9]]})

In [68]:
print B_val_1
print B_val_2

[[ 6.  7.  8.]]
[[  9.  10.  11.]
 [ 12.  13.  14.]]


<hr>
## Mini-batch Gradient Descent

In [69]:
X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

batch_size = 100
n_batches = int(np.ceil(m / batch_size))

def fetch_batch(epoch, batch_index, batch_size):
    index = epoch*batch_index
    X_batch = scaled_housing_data_plus_bias[index:index+batch_size, :]
    Y_batch
    return X_batch, Y_batch

In [70]:
print scaled_housing_data_plus_bias.shape

(20640, 9)
