In [1]:
import numpy as np
import tensorflow as tf
from sklearn.datasets import fetch_california_housing

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [2]:
housing = fetch_california_housing()
m,n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

In [3]:
'''
Compute Graph
'''
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

In [4]:
with tf.Session() as sess:
    theta_value = theta.eval()

print theta_value

[[ -3.74651413e+01]
 [  4.35734153e-01]
 [  9.33829229e-03]
 [ -1.06622010e-01]
 [  6.44106984e-01]
 [ -4.25131839e-06]
 [ -3.77322501e-03]
 [ -4.26648885e-01]
 [ -4.40514028e-01]]


<hr>
## Gradient Descent

In [5]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

In [6]:
reset_graph()

'''
Explicit Gradient Descent
'''
reset_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")

y_pred = tf.matmul(X, theta, name="predictions")

error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()

print best_theta

('Epoch', 0, 'MSE =', 9.1615419)
('Epoch', 100, 'MSE =', 9.1615419)
('Epoch', 200, 'MSE =', 9.161541)
('Epoch', 300, 'MSE =', 9.1615419)
('Epoch', 400, 'MSE =', 9.1615419)
('Epoch', 500, 'MSE =', 9.1615419)
('Epoch', 600, 'MSE =', 9.1615419)
('Epoch', 700, 'MSE =', 9.1615419)
('Epoch', 800, 'MSE =', 9.1615419)
('Epoch', 900, 'MSE =', 9.161541)
[[-0.1673944 ]
 [-0.46283674]
 [-0.04063368]
 [-0.27085733]
 [ 0.90942287]
 [ 0.88372922]
 [ 0.2296679 ]
 [-0.28315711]
 [ 0.18720484]]


In [7]:
'''
Gradient Descent using AutoDiff
'''

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

n_epochs = 1000
learning_rate = 0.01

scaler.fit(housing_data_plus_bias)
scaled_housing_data_plus_bias = scaler.transform(housing_data_plus_bias)

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")


error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if(epoch % 100 == 0):
            print("Epoch", epoch, "MSE=", mse.eval())
            sess.run(training_op)
        
        best_theta = theta.eval()

print best_theta

('Epoch', 0, 'MSE=', 10.286603)
('Epoch', 100, 'MSE=', 9.9733601)
('Epoch', 200, 'MSE=', 9.6803942)
('Epoch', 300, 'MSE=', 9.4063177)
('Epoch', 400, 'MSE=', 9.1498413)
('Epoch', 500, 'MSE=', 8.9097662)
('Epoch', 600, 'MSE=', 8.6849785)
('Epoch', 700, 'MSE=', 8.4744425)
('Epoch', 800, 'MSE=', 8.2771959)
('Epoch', 900, 'MSE=', 8.0923433)
[[-0.21868253]
 [-0.18326624]
 [ 0.81288898]
 [-0.43025935]
 [-0.2653321 ]
 [ 0.40435165]
 [-0.72610581]
 [ 0.6070227 ]
 [ 0.22958989]]


In [9]:
'''
Gradient Descent Using Optimizer
'''

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

n_epochs = 1000
learning_rate = 0.01

scaler.fit(housing_data_plus_bias)
scaled_housing_data_plus_bias = scaler.transform(housing_data_plus_bias)

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")


error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

#optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if(epoch % 100 == 0):
            print("Epoch", epoch, "MSE=", mse.eval())
            sess.run(training_op)
        
        best_theta = theta.eval()

print best_theta

('Epoch', 0, 'MSE=', 7.3321881)
('Epoch', 100, 'MSE=', 7.2273278)
('Epoch', 200, 'MSE=', 7.0397882)
('Epoch', 300, 'MSE=', 6.797636)
('Epoch', 400, 'MSE=', 6.5301552)
('Epoch', 500, 'MSE=', 6.2639127)
('Epoch', 600, 'MSE=', 6.0199814)
('Epoch', 700, 'MSE=', 5.812418)
('Epoch', 800, 'MSE=', 5.6479931)
('Epoch', 900, 'MSE=', 5.5269337)
[[-0.68307185]
 [ 0.65614766]
 [-0.1213624 ]
 [ 0.28206643]
 [-0.50254589]
 [-0.38790596]
 [ 0.11193584]
 [ 0.47083807]
 [ 0.45712954]]


<hr>
## Place holders

In [10]:
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5
with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A:[[1,2,3]]})
    B_val_2 = B.eval(feed_dict={A:[[4,5,6],[7,8,9]]})

In [11]:
print B_val_1
print B_val_2

[[ 6.  7.  8.]]
[[  9.  10.  11.]
 [ 12.  13.  14.]]
