# Hands On ML Chapter 9 - Up and Running with TensorFlow

In [1]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler

## First Graph and First Session

In [2]:
x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")
f = x*x*y + y + 2

In [3]:
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print(result)

sess.close()

42


In [4]:
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()
result

42

In [5]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    result = f.eval()

result

42

InteractiveSession automatically sets itself as the default session. So you don't need "with..." but you need to close session manually.

In [6]:
sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)
sess.close()

42


A TensorFlow program is split into two parts: the first is called the construction phase, and second execution phase. First part builds computations, second runs it.

## Managing Graphs

Any node you create is automatically added to the default graph.

In [7]:
x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

True

Sometimes you want to have multiple independent graphs. You can do it by creating new Graph and temporarily making it the default graph.

In [8]:
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)

x2.graph is graph, x2.graph is tf.get_default_graph()

(True, False)

In Jupyter, when you experiment, you can have many duplicate nodes in default graph. Use tf.reset_default_graph()

## Lifecycle of Node Value

In [9]:
w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    print(y.eval())
    print(z.eval())

10
15


If you dont want to evaluate w and x twice as it is done above, you should write code like this:

In [10]:
with tf.Session() as sess:
    y_val, z_val = sess.run([y,z])
    print(y_val)
    print(z_val)

10
15


## Linear Regression with TensorFlow

In [11]:
housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m,1)), housing.data]

X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

with tf.Session() as sess:
    theta_value = theta.eval()
    
print (theta_value)

[[ -3.74651413e+01]
 [  4.35734153e-01]
 [  9.33829229e-03]
 [ -1.06622010e-01]
 [  6.44106984e-01]
 [ -4.25131839e-06]
 [ -3.77322501e-03]
 [ -4.26648885e-01]
 [ -4.40514028e-01]]


## Implementing Gradient Descent

### Manually Computing the Gradients

In [12]:
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

In [13]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE = 9.95054
Epoch 100 MSE = 0.736909
Epoch 200 MSE = 0.559107
Epoch 300 MSE = 0.547524
Epoch 400 MSE = 0.541734
Epoch 500 MSE = 0.537524
Epoch 600 MSE = 0.534385
Epoch 700 MSE = 0.53203
Epoch 800 MSE = 0.530257
Epoch 900 MSE = 0.528916


### Using autodiff

In [14]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
gradients = tf.gradients(mse, [theta])[0] ## difference
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE = 10.8111
Epoch 100 MSE = 1.0067
Epoch 200 MSE = 0.741127
Epoch 300 MSE = 0.679636
Epoch 400 MSE = 0.638949
Epoch 500 MSE = 0.609343
Epoch 600 MSE = 0.587609
Epoch 700 MSE = 0.571605
Epoch 800 MSE = 0.55979
Epoch 900 MSE = 0.551039


### Using an Optimizer

In [15]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate) ##difference
training_op = optimizer.minimize(mse) ## difference

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

Epoch 0 MSE = 10.8187
Epoch 100 MSE = 0.88508
Epoch 200 MSE = 0.71827
Epoch 300 MSE = 0.669721
Epoch 400 MSE = 0.634774
Epoch 500 MSE = 0.608653
Epoch 600 MSE = 0.589035
Epoch 700 MSE = 0.574236
Epoch 800 MSE = 0.563022
Epoch 900 MSE = 0.554484


## Feeding Data tp the Training Algorithm

Placeholder is node used to pass the training data to TensorFlow during training.

In [16]:
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5
with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A: [[1,2,3]]})
    B_val_2 = B.eval(feed_dict={A: [[4,5,6], [7,8,9]]})

B_val_1, B_val_2

(array([[ 6.,  7.,  8.]], dtype=float32), array([[  9.,  10.,  11.],
        [ 12.,  13.,  14.]], dtype=float32))

### Mini-batch Gradient Descent

In [30]:
n_epochs = 1000
learning_rate = 0.01
n = 1

X = tf.placeholder(tf.float32, shape=(None,n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name='predictions')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate) ##difference
training_op = optimizer.minimize(mse) ## difference

init = tf.global_variables_initializer()

batch_size = 100
n_batches = int(np.ceil(m / batch_size))

def fetch_batch(epoch, batch_index, batch_size):
    # load data from disk
    X_batch = [[4,6]]
    y_batch = [[1]]
    return X_batch, y_batch

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
    best_theta = theta.eval()
    print (best_theta)

[[ 0.49853241]
 [-0.16568828]]


## Saving and Restoring Models

After training your model you should save its parameters to disk for future work. You can also save checkpoints during training so when your computer crashes during training you can start from the last checkpoint not from the begining.

Saving is very easy with tensorflow. You need to add node Saver after all you Variables.

In [37]:
x1 = tf.Variable(2, name='x1', dtype='float32')
x2 = tf.Variable([2,3,4], name='x2', dtype='float32')
x3 = tf.Variable(1, name='x3', dtype='float32')
x4 = tf.placeholder(tf.float32, shape=(None, 1), name="x4")
init = tf.global_variables_initializer()

f = x1 * x2 + x3 * x4

saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)
    
    for i in range(100):
        if i % 10 == 0:
            print(f.eval(feed_dict={x4: [[i]]}))
            save_path = saver.save(sess, "tmp/test_saver.ckpt")
    save_path = saver.save(sess, "tmp/test_saver_final.ckpt")

[[ 4.  6.  8.]]
[[ 14.  16.  18.]]
[[ 24.  26.  28.]]
[[ 34.  36.  38.]]
[[ 44.  46.  48.]]
[[ 54.  56.  58.]]
[[ 64.  66.  68.]]
[[ 74.  76.  78.]]
[[ 84.  86.  88.]]
[[ 94.  96.  98.]]


Restoring:

In [39]:
with tf.Session() as sess:
    saver.restore(sess,"tmp/test_saver.ckpt")

INFO:tensorflow:Restoring parameters from tmp/test_saver.ckpt


You can decide which variables you want to save:

In [41]:
saver = tf.train.Saver({"weights": theta}) #only theta variables with the name weights

By default the save() method also saves the structure of graph in file .meta. You can load this graph and add this to the default graph. 

In [42]:
saver = tf.train.import_meta_graph("tmp/test_saver.ckpt.meta")
with tf.Session() as sess:
    saver.restore(sess, "tmp/test_saver.ckpt")

INFO:tensorflow:Restoring parameters from tmp/test_saver.ckpt
