# Hands-On Machine Learning with Scikit-Learn & TensorFlow

## Chapter 9 Up and Running with TensorFlow

In [1]:
import matplotlib.pyplot as plt
import tensorflow as tf
import random
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler

%matplotlib inline

### Create Graph to be evaluated

In [2]:
x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")
f = x*x*y + y + 2

### Open a TensorFlow session to evaluate the Graph 

In [3]:
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print(result)

sess.close()

42


### Interactive Session that autometically sets itself as the default session

In [4]:
sess = tf.InteractiveSession()
init = tf.global_variables_initializer()
init.run()
result = f.eval()
print(result)
sess.close()

42


In [5]:
tf.Graph()

<tensorflow.python.framework.ops.Graph at 0x107d01828>

In [6]:
tf.get_default_graph()

<tensorflow.python.framework.ops.Graph at 0x115194c50>

### Reset default graph to avoid duplicate nodes when experimenting

In [7]:
tf.reset_default_graph()

In [8]:
w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    print(y.eval()) #evaluates w, then x, then y
    print(z.eval()) #evaluates w, then x, then z

10
15


### Make TensorFlow to evaluate both y and z in  one graph run

In [9]:
with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y_val)
    print(z_val)

10
15


### Linear Regression using the Normal Equation to evaluate theta

In [2]:
import numpy as np
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
m,n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m,1)), housing.data]

In [12]:
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

with tf.Session() as sess:
    theta_value = theta.eval()
theta_value

array([[ -3.74651413e+01],
       [  4.35734153e-01],
       [  9.33829229e-03],
       [ -1.06622010e-01],
       [  6.44106984e-01],
       [ -4.25131839e-06],
       [ -3.77322501e-03],
       [ -4.26648885e-01],
       [ -4.40514028e-01]], dtype=float32)

### Manually Computing the Gradients

In [14]:
tf.reset_default_graph()

In [16]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler

housing = fetch_california_housing()
m,n = housing.data.shape
ss = StandardScaler()
ss_housing = ss.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m,1)), ss_housing]

In [18]:
n_epochs = 1000
learning_rate = 0.01 

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
#XT = tf.transpose(X)
theta = tf.Variable(tf.random_uniform([n+1,1],-1.0,1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m*tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta-learning_rate*gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch%100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
best_theta

Epoch 0 MSE = 10.3409
Epoch 100 MSE = 0.729504
Epoch 200 MSE = 0.573306
Epoch 300 MSE = 0.557988
Epoch 400 MSE = 0.548578
Epoch 500 MSE = 0.541818
Epoch 600 MSE = 0.536942
Epoch 700 MSE = 0.533426
Epoch 800 MSE = 0.530889
Epoch 900 MSE = 0.529058


array([[  2.06855249e+00],
       [  7.96586573e-01],
       [  1.35625824e-01],
       [ -1.59971863e-01],
       [  1.99437290e-01],
       [  2.06436077e-03],
       [ -4.00493890e-02],
       [ -8.07897389e-01],
       [ -7.72620857e-01]], dtype=float32)

### Using autodiff

In [19]:
tf.reset_default_graph()

In [20]:
n_epochs = 1000
learning_rate = 0.01 

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
#XT = tf.transpose(X)
theta = tf.Variable(tf.random_uniform([n+1,1],-1.0,1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = tf.gradients(mse, [theta])[0] #2/m*tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta-learning_rate*gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch%100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
best_theta

Epoch 0 MSE = 14.9569
Epoch 100 MSE = 0.902462
Epoch 200 MSE = 0.652097
Epoch 300 MSE = 0.619159
Epoch 400 MSE = 0.597214
Epoch 500 MSE = 0.580691
Epoch 600 MSE = 0.568143
Epoch 700 MSE = 0.558565
Epoch 800 MSE = 0.551217
Epoch 900 MSE = 0.54555


array([[ 2.06855249],
       [ 0.90658039],
       [ 0.16214627],
       [-0.35675687],
       [ 0.35731012],
       [ 0.01028404],
       [-0.044638  ],
       [-0.51127869],
       [-0.48827058]], dtype=float32)

### Using an Optimizer

In [21]:
tf.reset_default_graph()

In [22]:
n_epochs = 1000
learning_rate = 0.01 

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
#XT = tf.transpose(X)
theta = tf.Variable(tf.random_uniform([n+1,1],-1.0,1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
#gradients = tf.gradients(mse, [theta])[0] #2/m*tf.matmul(tf.transpose(X), error)
#training_op = tf.assign(theta, theta-learning_rate*gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch%100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()
best_theta

Epoch 0 MSE = 6.81987
Epoch 100 MSE = 0.81702
Epoch 200 MSE = 0.675491
Epoch 300 MSE = 0.634579
Epoch 400 MSE = 0.605852
Epoch 500 MSE = 0.584823
Epoch 600 MSE = 0.569374
Epoch 700 MSE = 0.557995
Epoch 800 MSE = 0.549591
Epoch 900 MSE = 0.543365


array([[ 2.06855249],
       [ 0.85308784],
       [ 0.16113423],
       [-0.23882641],
       [ 0.25237545],
       [ 0.01061077],
       [-0.04341198],
       [-0.56889474],
       [-0.53884482]], dtype=float32)

### Mini-batch Gradient Descent

In [4]:
def fetch_batch(epoch=None, batch_index=None, batch_size=None):
    housing = fetch_california_housing()
    m,n = housing.data.shape
    ss = StandardScaler()
    ss_housing = ss.fit_transform(housing.data)
    scaled_housing_data_plus_bias = np.c_[np.ones((m,1)), ss_housing]
    random.seed(epoch)
    indices = np.arange(m)
    #random.shuffle(indices)
    #print(indices)
    #start = batch_index*batch_size
    #end = min(start+batch_size,m)
    ids = np.random.choice(indices, batch_size, replace=False) # randomly choose a subset(size=batch_size)of the indices
    X_batch = scaled_housing_data_plus_bias[ids] #[indices[start:end]]
    y_batch = housing.target.reshape(-1,1)[ids] #[indices[start:end]]
    return X_batch, y_batch #indices

In [118]:
n_epochs = 1000
learning_rate = 0.01 
batch_size = 1000
n_batches = 10 #int(np.ceil(m/batch_size))

tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape=(None, n+1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
#XT = tf.transpose(X)
theta = tf.Variable(tf.random_uniform([n+1,1],-1.0,1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)            
            sess.run(training_op, feed_dict={X: X_batch, y:y_batch})
        if epoch%100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval(feed_dict={X: X_batch, y: y_batch}))
    best_theta = theta.eval()
best_theta

Epoch 0 MSE = 2.87678
Epoch 100 MSE = 0.538309
Epoch 200 MSE = 0.515453
Epoch 300 MSE = 0.491285
Epoch 400 MSE = 0.551615
Epoch 500 MSE = 0.564801
Epoch 600 MSE = 0.545008
Epoch 700 MSE = 0.493773
Epoch 800 MSE = 0.530625
Epoch 900 MSE = 0.450043


array([[ 2.06964231],
       [ 0.83144158],
       [ 0.11716183],
       [-0.25468799],
       [ 0.31521973],
       [-0.00509178],
       [-0.03455453],
       [-0.90202737],
       [-0.87350297]], dtype=float32)

### Saving and Restoring Models

In [104]:
tf.reset_default_graph()

In [105]:
n_epochs = 1000
learning_rate = 0.01 

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
#XT = tf.transpose(X)
theta = tf.Variable(tf.random_uniform([n+1,1],-1.0,1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch%100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
            save_path = saver.save(sess, "./tmp/model.ckpt")
        sess.run(training_op)
    best_theta = theta.eval()
    save_path = saver.save(sess, "./tmp/model_final.ckpt")

best_theta

Epoch 0 MSE = 5.09407
Epoch 100 MSE = 0.659464
Epoch 200 MSE = 0.577468
Epoch 300 MSE = 0.563582
Epoch 400 MSE = 0.554667
Epoch 500 MSE = 0.547963
Epoch 600 MSE = 0.542839
Epoch 700 MSE = 0.5389
Epoch 800 MSE = 0.535854
Epoch 900 MSE = 0.533488


array([[ 2.06855249],
       [ 0.88501078],
       [ 0.14685963],
       [-0.33703393],
       [ 0.35024965],
       [ 0.00500379],
       [-0.04288518],
       [-0.64297843],
       [-0.61847723]], dtype=float32)

### Restore a model

In [112]:
tf.reset_default_graph()

In [113]:
n_epochs = 1000
learning_rate = 0.01 

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
#XT = tf.transpose(X)
theta = tf.Variable(tf.random_uniform([n+1,1],-1.0,1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    saver.restore(sess, "./tmp/model.ckpt")
    #sess.run(init)
    for epoch in range(n_epochs):
        if epoch%100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
            #save_path = saver.save(sess, "./tmp/model.ckpt")
        sess.run(training_op)
    best_theta = theta.eval()
    #save_path = saver.save(sess, "./tmp/model_final.ckpt")

best_theta

Epoch 0 MSE = 0.533488
Epoch 100 MSE = 0.53164
Epoch 200 MSE = 0.530188
Epoch 300 MSE = 0.529044
Epoch 400 MSE = 0.528137
Epoch 500 MSE = 0.527413
Epoch 600 MSE = 0.526835
Epoch 700 MSE = 0.526371
Epoch 800 MSE = 0.525996
Epoch 900 MSE = 0.525694


array([[  2.06855249e+00],
       [  8.59643519e-01],
       [  1.28414184e-01],
       [ -3.14709216e-01],
       [  3.43048215e-01],
       [ -1.37762446e-03],
       [ -4.07838225e-02],
       [ -8.01254332e-01],
       [ -7.75039196e-01]], dtype=float32)

### Visualing the Graph and Training Curves using TensorBoard

In [126]:
from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)
n_epochs = 1000
learning_rate = 0.01 
batch_size = 1000
n_batches = 10 #int(np.ceil(m/batch_size))

tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape=(None, n+1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n+1,1],-1.0,1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)
mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)                        
            if batch_index%10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y:y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X: X_batch, y:y_batch})
        if epoch%100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval(feed_dict={X: X_batch, y: y_batch}))
    best_theta = theta.eval()

file_writer.close()    
best_theta

Epoch 0 MSE = 6.65287
Epoch 100 MSE = 0.489574
Epoch 200 MSE = 0.536631
Epoch 300 MSE = 0.563732
Epoch 400 MSE = 0.531661
Epoch 500 MSE = 0.561771
Epoch 600 MSE = 0.530243
Epoch 700 MSE = 0.515977
Epoch 800 MSE = 0.570246
Epoch 900 MSE = 0.478018


array([[  2.06452680e+00],
       [  8.25280368e-01],
       [  1.19496971e-01],
       [ -2.83477902e-01],
       [  2.89164603e-01],
       [  2.97480728e-05],
       [ -3.97667922e-02],
       [ -8.98564637e-01],
       [ -8.69776428e-01]], dtype=float32)

Run 1:<br>
Epoch 0 MSE = 6.94011<br>
Epoch 100 MSE = 0.48316<br>
Epoch 200 MSE = 0.519101<br>
Epoch 300 MSE = 0.487074<br>
Epoch 400 MSE = 0.488232<br>
Epoch 500 MSE = 0.522554<br>
Epoch 600 MSE = 0.473014<br>
Epoch 700 MSE = 0.514996<br>
Epoch 800 MSE = 0.489359<br>
Epoch 900 MSE = 0.603913<br>

To open TensorBoard, do the following command at terminal:

$ tensorboard --logdir tf_logs/

### Modularity 

In [144]:
from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_modules"
logdir = "{}/run-{}/".format(root_logdir, now)

In [145]:
X_batch, y_batch = fetch_batch(epoch=0, batch_index=0, batch_size=100)

In [149]:
tf.reset_default_graph()
def relu(X):
    w_shape = (int(X.shape[1]), 1)#(int(X.get_shape()[1]), 1)
    w = tf.Variable(tf.random_normal(w_shape), name="weights")
    b = tf.Variable(0.0, name="bias")
    z = tf.add(tf.matmul(X,w), b, name="z")
    return tf.maximum(z, 0., name="relu")

#n_features = n
X = tf.placeholder(tf.float32, shape=(None, n+1), name="X")
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name="output")
output_summary = tf.summary.scalar('output', output)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    #X_batch, y_batch = fetch_batch(epoch=0, batch_index=0, batch_size=100)
    sess.run(output, feed_dict={X: X_batch})
    #summary_str = output_summary.eval(feed_dict={X: X_batch})
    #file_writer.add_summary(summary_str)
    
file_writer.close()    

### Name Scopes

In [5]:
from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_name_scopes"
logdir = "{}/run-{}/".format(root_logdir, now)

In [159]:
tf.reset_default_graph()
def relu(X):
    with tf.name_scope("relu"):
        w_shape = (int(X.shape[1]), 1)#(int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X,w), b, name="z")
        maximum = tf.maximum(z, 0., name="max")
        return maximum

#n_features = n
X = tf.placeholder(tf.float32, shape=(None, n+1), name="X")
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name="output")
#output_summary = tf.summary.scalar('output', output)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    #X_batch, y_batch = fetch_batch(epoch=0, batch_index=0, batch_size=100)
    sess.run(output, feed_dict={X: X_batch})
    #summary_str = output_summary.eval(feed_dict={X: X_batch})
    #file_writer.add_summary(summary_str)
    
file_writer.close()    

### Sharing Variables

In [12]:
from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_sv1"
logdir = "{}/run-{}/".format(root_logdir, now)

In [13]:
X_batch, y_batch = fetch_batch(epoch=0, batch_index=0, batch_size=100)

In [14]:
tf.reset_default_graph()
def relu(X):
    with tf.variable_scope("relu", reuse=True):
        threshold = tf.get_variable("threshold")
        with tf.name_scope("relu"):
            w_shape = (int(X.shape[1]), 1)#(int(X.get_shape()[1]), 1)
            w = tf.Variable(tf.random_normal(w_shape), name="weights")
            b = tf.Variable(0.0, name="bias")
            z = tf.add(tf.matmul(X,w), b, name="z")
        return tf.maximum(z, threshold, name="max")

X = tf.placeholder(tf.float32, shape=(None, n+1), name="X")
with tf.variable_scope("relu"): 
    threshold = tf.get_variable("threshold", shape=(),initializer=tf.constant_initializer(0.0))
relus = [relu(X) for relu_index in range(5)]
output = tf.add_n(relus, name="output")
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    sess.run(output, feed_dict={X: X_batch})
    
file_writer.close()    

Another way of sharing:

In [15]:
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_sv2"
logdir = "{}/run-{}/".format(root_logdir, now)

In [16]:
tf.reset_default_graph()
def relu(X):
    threshold = tf.get_variable("threshold", shape=(),initializer=tf.constant_initializer(0.0))
#     with tf.variable_scope("relu", reuse=True):
#         threshold = tf.get_variable("threshold")
    with tf.name_scope("relu"):
        w_shape = (int(X.shape[1]), 1)#(int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X,w), b, name="z")
    return tf.maximum(z, threshold, name="max")

X = tf.placeholder(tf.float32, shape=(None, n+1), name="X")
relus = []
for relu_index in range(5):    
    with tf.variable_scope("relu", reuse=(relu_index >= 1)) as scope:
        relus.append(relu(X))
output = tf.add_n(relus, name="output")
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    sess.run(output, feed_dict={X: X_batch})
    
file_writer.close()    