In [5]:
import tensorflow as tf

x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")

f = x*x*y + y + 2

# The above code does no computation,
# it only creates a computation graph

# Create a session to run the computation
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print(result)

sess.close()

42


In [6]:
# Or we can call within a with statement
# then sess will be set as the default session
# equivalent to calling tf.get_default_session().run(...)
# eval() method is calledf for each node to evaluate its value

with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()
    print(result)

# And the session is closed automatically at then end of the with block

42


In [7]:
# global_variables_initializer() method can be used to
# initialize all variables in one pass

init = tf.global_variables_initializer() # prepare an init node

with tf.Session() as sess:
    init.run()
    print(f.eval())

42


In [8]:
# InteractiveSession: automatically set itself as default session

sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)
sess.close()

42


In [9]:
# 9.1 Managing Graphs

# Any Node you create is automatically added to the default graph
x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

True

In [10]:
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)

x2.graph is graph


True

In [11]:
x2.graph is tf.get_default_graph()

False

In [12]:
# you can call tf.reset_default_graph() to reset the default graph
# removing all nodes added to the original default graph
tf.reset_default_graph()

In [13]:
# 9.2 Lifecycle of a Node Value
# When you evaluate a node, tensorflow will automatically decide the
# nodes it depends on and evaluate those nodes first
w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    print(y.eval())
    print(z.eval())

10
15


In [14]:
# node values are dropped between graph runs,
# only variable values are kept
# a Variable starts its life when initializer is run and ends when session is closed
# if you don't want to evaluate w and x twice, you must ask tf to evaluate both y
# and z in just one graph run:

with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y_val)
    print(z_val)

10
15


In [15]:
# 9.3 Linear Regression with Tensorflow
# tensorflow operations can take any number of inputs and produce any number of outputs
# multi-dimensional array inpout/outputs are called tensors
# like numpy arrays, tensors have a shape and a type
# in python api, tensors are simply represented by numpy ndarrays

import numpy as np
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
m, n = housing.data.shape
print(m, n)

20640 8


In [16]:
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]
print(housing_data_plus_bias.shape)

(20640, 9)


In [17]:
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape((-1, 1)), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

with tf.Session() as sess:
    print(theta.eval())

[[-3.7185181e+01]
 [ 4.3633747e-01]
 [ 9.3952334e-03]
 [-1.0711310e-01]
 [ 6.4479220e-01]
 [-4.0338000e-06]
 [-3.7813708e-03]
 [-4.2348403e-01]
 [-4.3721911e-01]]


In [18]:
# Tensorflow will automatically run this on your GPU card if you have one
# and installed the GPU-version tensorflow

In [19]:
# 9.4 Implementing Gradienet Descent
# Use tensorflow's autodiff feature to compute gradients automatically
# Remember to normalize the values before doing gradient descent, otherwise the process
# could be very slow, StandardScaler is a very handy tool

# First, manually computing the gradient
from sklearn.preprocessing import StandardScaler

housing_data = housing.data 

# NOTE! don't include the biase 1 term when you do scaling
scaler = StandardScaler()
scaler.fit(housing_data)
scaled_housing_data = scaler.transform(housing_data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]
scaled_housing_data_plus_bias

array([[ 1.        ,  2.34476576,  0.98214266, ..., -0.04959654,
         1.05254828, -1.32783522],
       [ 1.        ,  2.33223796, -0.60701891, ..., -0.09251223,
         1.04318455, -1.32284391],
       [ 1.        ,  1.7826994 ,  1.85618152, ..., -0.02584253,
         1.03850269, -1.33282653],
       ...,
       [ 1.        , -1.14259331, -0.92485123, ..., -0.0717345 ,
         1.77823747, -0.8237132 ],
       [ 1.        , -1.05458292, -0.84539315, ..., -0.09122515,
         1.77823747, -0.87362627],
       [ 1.        , -0.78012947, -1.00430931, ..., -0.04368215,
         1.75014627, -0.83369581]])

In [20]:
tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape((-1, 1)), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE = ", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()
    print(best_theta)

Epoch 0 MSE =  8.463472
Epoch 100 MSE =  0.6564594
Epoch 200 MSE =  0.56074
Epoch 300 MSE =  0.5514607
Epoch 400 MSE =  0.5454856
Epoch 500 MSE =  0.5409267
Epoch 600 MSE =  0.53741544
Epoch 700 MSE =  0.5346962
Epoch 800 MSE =  0.53258
Epoch 900 MSE =  0.53092426
[[ 2.0685523 ]
 [ 0.8801184 ]
 [ 0.142255  ]
 [-0.33468568]
 [ 0.35134014]
 [ 0.00339176]
 [-0.04239122]
 [-0.6811341 ]
 [-0.6563971 ]]


In [21]:
# Using autodiff
tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape((-1, 1)), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

# The gradients function takes an op(in this casae mse) and a list of variables
# in this case just theta , and it creates a list of ops to compute the gradients
# of the op with regards to each variable
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE = ", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()
    print(best_theta)

Epoch 0 MSE =  9.802762
Epoch 100 MSE =  0.7400382
Epoch 200 MSE =  0.5581155
Epoch 300 MSE =  0.5447478
Epoch 400 MSE =  0.5395431
Epoch 500 MSE =  0.53591996
Epoch 600 MSE =  0.5332178
Epoch 700 MSE =  0.53118086
Epoch 800 MSE =  0.5296383
Epoch 900 MSE =  0.5284637
[[ 2.0685525 ]
 [ 0.8575015 ]
 [ 0.13829707]
 [-0.29142457]
 [ 0.3153329 ]
 [ 0.00224446]
 [-0.04157392]
 [-0.73121136]
 [-0.70381624]]


In [22]:
# tensorflow uses reverse-mode autodiff
# good when there are many inputs and few outputs, as is often the case in
# neural networks

In [23]:
# Using an optimizer
# Using autodiff
tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape((-1, 1)), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE = ", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()
    print(best_theta)


Epoch 0 MSE =  8.578274
Epoch 100 MSE =  0.6580695
Epoch 200 MSE =  0.54854965
Epoch 300 MSE =  0.54265535
Epoch 400 MSE =  0.539316
Epoch 500 MSE =  0.53663474
Epoch 600 MSE =  0.53445137
Epoch 700 MSE =  0.5326685
Epoch 800 MSE =  0.531209
Epoch 900 MSE =  0.5300116
[[ 2.0685525e+00]
 [ 8.9665812e-01]
 [ 1.3707907e-01]
 [-3.8141617e-01]
 [ 3.9679003e-01]
 [ 1.2939022e-03]
 [-4.2305984e-02]
 [-7.0332998e-01]
 [-6.8126684e-01]]


In [24]:
# Using momentum optimizer
# Using autodiff
tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape((-1, 1)), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE = ", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()
    print(best_theta)


Epoch 0 MSE =  4.317224
Epoch 100 MSE =  0.52569693
Epoch 200 MSE =  0.5244722
Epoch 300 MSE =  0.52434045
Epoch 400 MSE =  0.52432317
Epoch 500 MSE =  0.52432126
Epoch 600 MSE =  0.52432096
Epoch 700 MSE =  0.52432084
Epoch 800 MSE =  0.5243208
Epoch 900 MSE =  0.524321
[[ 2.0685577 ]
 [ 0.82962364]
 [ 0.11875248]
 [-0.26553544]
 [ 0.30570337]
 [-0.00450275]
 [-0.03932644]
 [-0.8998754 ]
 [-0.8705313 ]]


In [25]:
# 9.5 Feeding data to the Training Algorithm

# modify the code to use mini-batch 
# each iteration use the next mini-batch
# best way to do this is to use placeholder nodes

# placeholder nodes only output data, it does no computation
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5

# feed_dict specifies the value of A
with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A: [[1, 2, 3]]})
    B_val_2 = B.eval(feed_dict={A: [[1, 2, 3], [4, 5, 6]]})
    
    print(B_val_1)
    print(B_val_2)

[[6. 7. 8.]]
[[ 6.  7.  8.]
 [ 9. 10. 11.]]


In [26]:
# Using autodiff
tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

# Define the batch size and #batches
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

def fetch_batch(epoch, batch_index, batch_size):
    start = batch_index * batch_size
    end = min(m, start + batch_size)
    X_batch, Y_batch = scaled_housing_data_plus_bias[start:end, :], housing.target.reshape((-1, 1))[start:end, :]
    return X_batch, Y_batch

def random_fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)  # not shown in the book
    indices = np.random.randint(m, size=batch_size)  # not shown
    X_batch = scaled_housing_data_plus_bias[indices] # not shown
    y_batch = housing.target.reshape(-1, 1)[indices] # not shown
    return X_batch, y_batch


theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        #if epoch % 100 == 0:
        #    print("Epoch", epoch, "MSE = ", mse.eval())
            
        for batch_index in range(n_batches):
            X_batch, Y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: Y_batch})

    best_theta = theta.eval()
    print(best_theta)


[[ 2.002909  ]
 [ 0.7834712 ]
 [ 0.13612014]
 [-0.23013695]
 [ 0.27156723]
 [-0.00706379]
 [-0.0132116 ]
 [-0.86901027]
 [-0.89599955]]


In [27]:
# 9.6 Saving and Restoring Models
# Construct a saver node after all variables
# are created
# then call save() during execution phase
# Using autodiff
tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

# Define the batch size and #batches
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

def fetch_batch(epoch, batch_index, batch_size):
    start = batch_index * batch_size
    end = min(m, start + batch_size)
    X_batch, Y_batch = scaled_housing_data_plus_bias[start:end, :], housing.target.reshape((-1, 1))[start:end, :]
    return X_batch, Y_batch

def random_fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)  # not shown in the book
    indices = np.random.randint(m, size=batch_size)  # not shown
    X_batch = scaled_housing_data_plus_bias[indices] # not shown
    y_batch = housing.target.reshape(-1, 1)[indices] # not shown
    return X_batch, y_batch


theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            save_path = saver.save(sess, '/tmp/my_model.ckpt')
            
        for batch_index in range(n_batches):
            X_batch, Y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: Y_batch})
        
        
    best_theta = theta.eval()
    print(best_theta)
    save_path = saver.save(sess, '/tmp/my_model_final.ckpt')


[[ 2.002909  ]
 [ 0.7834715 ]
 [ 0.13612019]
 [-0.23013733]
 [ 0.27156752]
 [-0.00706378]
 [-0.01321162]
 [-0.8690099 ]
 [-0.89599925]]


In [28]:
# restore a session is easy:
# saver.restore(sess, ...)


In [30]:
# 9.7 Visualizing with tensorboard
# Using autodiff
from datetime import datetime
import tensorflow as tf
import numpy as np

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

# Define the batch size and #batches
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

def fetch_batch(epoch, batch_index, batch_size):
    start = batch_index * batch_size
    end = min(m, start + batch_size)
    X_batch, Y_batch = scaled_housing_data_plus_bias[start:end, :], housing.target.reshape((-1, 1))[start:end, :]
    return X_batch, Y_batch

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

# Create a node that will evaluate MSE value and write it to
# a TensorBoard-compatible binary log string called summary
mse_summary = tf.summary.scalar('MSE', mse)

# Creates a file writer used to write summaries to log files
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        #if epoch % 100 == 0:
        #    print("Epoch", epoch, "MSE = ", mse.eval())
            
        for batch_index in range(n_batches):
            X_batch, Y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X:X_batch, y:Y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
                
            sess.run(training_op, feed_dict={X: X_batch, y: Y_batch})

    best_theta = theta.eval()
    print(best_theta)
    file_writer.close()



[[ 2.002909  ]
 [ 0.7834715 ]
 [ 0.13612019]
 [-0.23013733]
 [ 0.27156752]
 [-0.00706378]
 [-0.01321162]
 [-0.8690099 ]
 [-0.89599925]]


In [31]:
# 9.8 Name Scopes
with tf.name_scope("loss") as scope:
    error = y_pred - y
    mse = tf.reduce_mean(tf.square(error), name="mse")

print(error.op.name)
print(mse.op.name)

loss/sub
loss/mse


In [39]:
# Modularity

# Tensorflow will check if the name already exists
# If so, it will append _1 _2 to differentiate the names
# So the names will be relu relu_1 relu_2...
# But the code is clearer as we encapsulated the relu unit's creation

def relu(X):
    w_shape = (int(X.get_shape()[1]), 1)
    w = tf.Variable(tf.random_normal(w_shape), name="weights")
    b = tf.Variable(0.0, name="bias")
    z = tf.add(tf.matmul(X, w), b, name="z")
    return tf.maximum(z, 0., name="relu")

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name="output")

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    X_feed = np.array([1, 2, 3], dtype=np.float32).reshape((-1, 3))
    result = output.eval(feed_dict={X:X_feed})
    print(result)

[[13.446552]]


In [43]:
# Share variables
# you could create a variable and pass it around as parameters for
# others to use, but it will be difficult if there are too many such variables
# use get_variable() function to create the shared variable if it does not exist yet
# or reuse it if it already exists
# The creating and reusing is controlled by the current variable_scope
    
def relu(X):
    with tf.variable_scope("relu") as scope:
        scope.reuse_variables() 
        # Same as set reuse=True in variable_scope() call
        # once it is set to true, it cannot be set back to False in this scope
        # Only variables created by get_variables can be reused this way
        threshold = tf.get_variable("threshold")
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X, w), b, name="z")
        return tf.maximum(z, threshold, name="relu")

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")

# Create the threshold variable
with tf.variable_scope("relu", reuse=True):
    threshold = tf.get_variable("threshold", shape=(),
                                initializer=tf.constant_initializer(0.0))

# the five relus reused the same relu/threshold variable
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name="output")

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    X_feed = np.array([1, 2, 3], dtype=np.float32).reshape((-1, 3))
    result = output.eval(feed_dict={X:X_feed})
    print(result)


[[3.0104785]]


In [None]:
# Variables created from get_variable() will have variable_scope name
# appended before it
# but for all other variables, the variable_scope will act like a name_scope
# and _index will be appended