In [68]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "tensorflow"

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

# Creating Your First Graph and Running It in a Session

In [69]:
import tensorflow as tf

reset_graph()

x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")
f = x*x*y + y + 2

In [70]:
f

# The most important thing to understand is that this code does not actually perform any computation.
# It just creates a computation graph. Even the variables are not initialized yet. 
# To evaluate this graph, you need to open a Tensorflow session and use it to initialize the variables and evaluate f.

<tf.Tensor 'add_1:0' shape=() dtype=int32>

In [71]:
# The following code creates a session, initializes the variables, and evaluates, and f then closes the session(which frees up resources):

sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print(result)

42


In [72]:
sess.close()

In [73]:
# Having to repeat sess.run() all the time is a bit cumbersome, but fortunately, there is a better way:

with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()


In [74]:
# Instead of manually running the initializer for every single variable, we can use the global_variables_initializer() function. This doesn't perform the initialization immediately, but rather creates a node in the graph that will initialize all variables when it is run:

init = tf.global_variables_initializer() # prepare an init node

with tf.Session() as sess:
    init.run()
    result = f.eval()

In [75]:
result

42

In [76]:
init = tf.global_variables_initializer()

In [77]:
# In Jupyter or Python you may prefer to create an InteractiveSession. 
# The only difference from a regular session is that when an InteractiveSession is created, it automatically sets itself as the default session, so you don't need to use a with block( But you do need to close the session manually when you are done with it.):

sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)

42


In [78]:
# A TensorFlow program is typically split into two parts: 
    # The first part builds a computation graph (this is called the construction phase)
    # The second part runs it (execution phase)
    
# The construction phase typically builds a computation graph representing the ML model and the computations required to train it.
# The execution phase generally runs a loop that evaluates a training step repeatedly, gradually improving the model parameters.

# Managing Graphs 

In [79]:
# Any node you create is automatically added to the default graph:

reset_graph()

x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

True

In [80]:
# In some cases this is fine, but sometimes you may want to manage multiple independent graphs. You can do this by creating a new Graph and temporarily making it the default graph inside a with block, like so:

graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)
    
x2.graph is graph

True

In [81]:
x2.graph is tf.get_default_graph()

False

# Lifecycle of a Node Value

In [82]:
# When you evaluate a node, TensorFlow automatically determines the set of nodes that it depends upon and it evaluates these nodes first.
# Consider the following code:

w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    print(y.eval()) # 10
    print(z.eval()) # 15
    
# First this code defines a very simple graph.
# Then it starts a session and runs the graph to evaluate y.
# Finally, it evaluates z
# Through this process, w and x are evaluated twice.


10
15


In [83]:
# Therefore if you want to evaluate y and z efficiently without evaluating both y and z each run, use the following code:

with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y_val) # 10
    print(z_val) # 15

10
15


# Linear Regression with Tensorflow

In [84]:
# Tensorflow operations (also called ops for short) can take any number of inputs and produce any number of outputs.
# For example, the addition and multiplication ops each take two inputs and produce one output.

# The inputs and outputs are multidimensional arrays called tensors(hence Tensorflow).

# In the preceding examples, the tensors just contained a single scalar value, but we can perform computations with arrays of any shape.


In [85]:
# The following code manipulates 2D arrays to perform Linear Regression of the California housing dataset:

# First it starts by fetching the dataset.
# Then it adds an extra bias input feauture (X0=1) to all training instances.
# Then it creates two TensorFlow constant nodes, X and y, to hold this data and the targets.
# Then it uses some of the matrix operations in Tensorflow to define theta.
# The computations aren't performed immediately... Only the nodes are created.
# Finally the code creates a session and uses it to evaluate theta.

import numpy as np
from sklearn.datasets import fetch_california_housing

reset_graph()

housing = fetch_california_housing()
m,n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

with tf.Session() as sess:
    theta_value = theta.eval()



In [86]:
theta_value

array([[-3.7465141e+01],
       [ 4.3573415e-01],
       [ 9.3382923e-03],
       [-1.0662201e-01],
       [ 6.4410698e-01],
       [-4.2513184e-06],
       [-3.7732250e-03],
       [-4.2664889e-01],
       [-4.4051403e-01]], dtype=float32)

In [87]:
# The main benefit of this code versus computing the Normal Equation directly using Numpy is that TensorFlow will automatically run this on your GPU card... Provided you have a GPU, and Tensorflow GPU installed
# Compare the ode above with the Numpy code equivalent:

from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(housing.data, housing.target.reshape(-1, 1))

print(np.r_[lin_reg.intercept_.reshape(-1, 1), lin_reg.coef_.T])

[[-3.69419202e+01]
 [ 4.36693293e-01]
 [ 9.43577803e-03]
 [-1.07322041e-01]
 [ 6.45065694e-01]
 [-3.97638942e-06]
 [-3.78654265e-03]
 [-4.21314378e-01]
 [-4.34513755e-01]]


# Implementing Gradient Descent 

In [88]:
# Let's use Batch Gradient Descent instead of the Normal Equation. 
# First we will do this by manually computingthe gradients.
# Then we will use Tensorflow's autodiff feature to let TensorFlow compute the gradients automatically.
# Finally we will use a couple of TensorFlow's out-of-box optimizers

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

In [89]:
print(scaled_housing_data_plus_bias.mean(axis=0))
print(scaled_housing_data_plus_bias.mean(axis=1))
print(scaled_housing_data_plus_bias.mean())
print(scaled_housing_data_plus_bias.shape)

[ 1.00000000e+00  6.60969987e-17  5.50808322e-18  6.60969987e-17
 -1.06030602e-16 -1.10161664e-17  3.44255201e-18 -1.07958431e-15
 -8.52651283e-15]
[ 0.38915536  0.36424355  0.5116157  ... -0.06612179 -0.06360587
  0.01359031]
0.11111111111111005
(20640, 9)


# Manually computing the gradients

In [90]:
# The following code should be fairly self-explanatory, except for a few new elements:

# The random_uniform() function creates a node in the graph that will generate a tensor containing random values.
# The reduce_mean() function creates a node that will compute the mean of its input tensor, just like Numpy's mean() function.
# The assign() function creates a node that will assign a new value to a variable.
# The main loop executes the training step over and over again(n_epochs times), and every 100 iterations it prints out the current MSE.

reset_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse =tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
        
    best_theta = theta.eval()
            

Epoch 0 MSE = 9.161543
Epoch 100 MSE = 0.7145007
Epoch 200 MSE = 0.5667047
Epoch 300 MSE = 0.5555716
Epoch 400 MSE = 0.5488116
Epoch 500 MSE = 0.54363626
Epoch 600 MSE = 0.53962916
Epoch 700 MSE = 0.53650916
Epoch 800 MSE = 0.5340678
Epoch 900 MSE = 0.53214705


In [91]:
best_theta

array([[ 2.0685525 ],
       [ 0.8874027 ],
       [ 0.14401658],
       [-0.34770882],
       [ 0.36178368],
       [ 0.00393812],
       [-0.04269557],
       [-0.6614528 ],
       [-0.63752776]], dtype=float32)

# Using autodiff

In [92]:
# Autodiff is a Tensorflow function that lets you automatically and efficiently compute gradients.
# What are gradients again? It's the rate of change of a dataset or function.
# There are four ways to automatically computing gradients. However Tensorflow uses reverse-mode autodiff, when there are many inputs but few outputs(As is the case with neural networks)

reset_graph()
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

In [93]:
gradients = tf.gradients(mse, [theta])[0]

In [94]:
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE=", mse.eval())
        sess.run(training_op)
    
    best_theta = theta.eval()
    
print("Best theta:")
print(best_theta)

Epoch 0 MSE= 9.161543
Epoch 100 MSE= 0.7145006
Epoch 200 MSE= 0.56670463
Epoch 300 MSE= 0.5555716
Epoch 400 MSE= 0.5488117
Epoch 500 MSE= 0.5436362
Epoch 600 MSE= 0.53962916
Epoch 700 MSE= 0.53650916
Epoch 800 MSE= 0.5340678
Epoch 900 MSE= 0.53214717
Best theta:
[[ 2.0685525 ]
 [ 0.8874027 ]
 [ 0.14401658]
 [-0.34770882]
 [ 0.36178368]
 [ 0.00393811]
 [-0.04269556]
 [-0.6614528 ]
 [-0.6375277 ]]


In [95]:
# How could you find the partial derivatives of the following function with regards to a and b?

def my_func(a, b):
    z = 0
    for i in range(100):
        z = a * np.cos(z + i) + z * np.sin(b - i)
    return z

In [96]:
my_func(0.2, 0.3)

-0.21253923284754914

In [97]:
reset_graph()

a = tf.Variable(0.2, name="a")
b = tf.Variable(0.3, name="b")
z = tf.constant(0.0, name="z0")
for i in range(100):
    z = a * tf.cos(z + i) + z * tf.sin(b - i)
    
grads = tf.gradients(z, [a, b])
init = tf.global_variables_initializer()
                            

In [98]:
# Let's compute the function at a=0.2 and b=0.3, and the partial derivatives at that point with regards to a and with regards to b:

with tf.Session() as sess:
    init.run()
    print(z.eval())
    print(sess.run(grads))

-0.21253741
[-1.1388494, 0.19671395]


# Using a GradientDescentOptimizer

In [99]:
# Tensorflow computes the gradients for you. But it also provides a number of optimizers.
# We can simply replace the preceding gradients = ... and training_op = ... lines with the code below.

reset_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

In [100]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

In [101]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
        
    best_theta = theta.eval()
    
print("Best theta:")
print(best_theta)

Epoch 0 MSE = 9.161543
Epoch 100 MSE = 0.7145006
Epoch 200 MSE = 0.56670463
Epoch 300 MSE = 0.5555716
Epoch 400 MSE = 0.5488117
Epoch 500 MSE = 0.5436362
Epoch 600 MSE = 0.53962916
Epoch 700 MSE = 0.53650916
Epoch 800 MSE = 0.5340678
Epoch 900 MSE = 0.53214717
Best theta:
[[ 2.0685525 ]
 [ 0.8874027 ]
 [ 0.14401658]
 [-0.34770882]
 [ 0.36178368]
 [ 0.00393811]
 [-0.04269556]
 [-0.6614528 ]
 [-0.6375277 ]]


# Using a momentum optimizer

In [102]:
reset_graph()

n_epochs = 1000
learning_rate= 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="X")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

In [103]:
# If you want to use a different type of optimizer, you just need to change one line.
# For example, you can use the momentum optimizer by defining the optimizer like this:

optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)

In [104]:
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

In [105]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        sess.run(training_op)
        
    best_theta = theta.eval()
    
print("Best theta:")
print(best_theta)

Best theta:
[[ 2.068558  ]
 [ 0.8296286 ]
 [ 0.11875337]
 [-0.26554456]
 [ 0.3057109 ]
 [-0.00450251]
 [-0.03932662]
 [-0.89986444]
 [-0.87052065]]


# Feeding data to the training algorithm

In [106]:
# Let's try to modify the previous code to implement Mini-batch Gradient Descent. For this, we need a way to replace X and y at every iteration with the next mini-batch.
# The simplest way to do this is to use placeholder nodes. These nodes don't perform computation, they just output data you tell them to output at runtime.

# To create a placeholder node, you must call the placeholder() function and specify the output tensor's data type.

reset_graph()

A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5
with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A: [[1, 2, 3]]})
    B_val_2 = B.eval(feed_dict={A: [[4, 5, 6], [7, 9, 9]]})
    
print(B_val_1)

[[6. 7. 8.]]


In [107]:
print(B_val_2)

[[ 9. 10. 11.]
 [12. 14. 14.]]


# Mini-batch Gradient Descent

In [108]:
# To implement Mini-batch Gradient Descent, we only need to tweak the existing code slightly. 
# First change the definition of X and y in the construction phase to make them plceholder nodes:

n_epochs = 1000
learning_rate = 0.01

In [109]:
reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

In [110]:
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()



In [111]:
n_epochs = 10

In [112]:
# Then define the batch size and compute the total number of batches:

batch_size = 100
n_batches = int(np.ceil(m / batch_size))

In [113]:
# Finally, in the execution phase, fetch the mini-batches one by one, then provide the value of X and y via the feed_dict parameter when evaluating a node that depends on either of them.

def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)
    indices = np.random.randint(m, size=batch_size)
    X_batch = scaled_housing_data_plus_bias[indices]
    y_batch = housing.target.reshape(-1, 1)[indices]
    return X_batch, y_batch

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            
    best_theta = theta.eval()

In [114]:
best_theta

array([[ 2.0703337 ],
       [ 0.8637145 ],
       [ 0.12255151],
       [-0.31211874],
       [ 0.38510373],
       [ 0.00434168],
       [-0.01232954],
       [-0.83376896],
       [-0.8030471 ]], dtype=float32)

# Saving and Restoring Models

In [115]:
# Once you've trained youe model you should save its parameters to disk so you can come back to it whenever you want, use it in another program, compare it to other models, etc..
# Moreover you probably want to save checkpoints at regular intervals.

# In Tensorflow, you can create a Saver node at the end of the construction phase; then, in the execution phase. just call its save() method whenever you want to save the model, passing the session and path of the checkpoint file:

reset_graph()

n_epochs = 1000 
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
# not shown
            save_path = saver.save(sess, "/tmp/my_model.ckpt")
        sess.run(training_op)
        
    best_theta = theta.eval()
    save_path = saver.save(sess, "/tmp/my_model_final.ckpt")

Epoch 0 MSE = 9.161543
Epoch 100 MSE = 0.7145006
Epoch 200 MSE = 0.56670463
Epoch 300 MSE = 0.5555716
Epoch 400 MSE = 0.5488117
Epoch 500 MSE = 0.5436362
Epoch 600 MSE = 0.53962916
Epoch 700 MSE = 0.53650916
Epoch 800 MSE = 0.5340678
Epoch 900 MSE = 0.53214717


In [116]:
best_theta

array([[ 2.0685525 ],
       [ 0.8874027 ],
       [ 0.14401658],
       [-0.34770882],
       [ 0.36178368],
       [ 0.00393811],
       [-0.04269556],
       [-0.6614528 ],
       [-0.6375277 ]], dtype=float32)

In [117]:
# Restoring a model is just as easy: you create a Saver at the end of the construction phase just like before.
# But then at the beginning of the execution phase, instead of initializing the variables using the init node, you call the restore() method of the Saver object:

with tf.Session() as sess:
    saver.restore(sess, "/tmp/my_model_final.ckpt")
    best_theta_restored = theta.eval()

INFO:tensorflow:Restoring parameters from /tmp/my_model_final.ckpt


In [118]:
# By default a Saver saves and restores all variables under their own name.
# But if you need more control, you can specify which variables to save or restore, and what names to use.
# For example, the followin Saver will save or restore only the theta variable under the name weights:

saver = tf.train.Saver({"weights": theta})

In [119]:
np.allclose(best_theta, best_theta_restored)

True

In [120]:
# By default the save() method also saves the structure of the graph in a second file with the same name plus a .meta extension.
# You can load this graph structure using tf.train.import_meta_graph(). 
# This adds the graph to the default graph, and returns a Saver instance that you can then use to restore the graph's state:

reset_graph()

saver = tf.train.import_meta_graph("/tmp/my_model_final.ckpt.meta") # this loads the graph structure

theta = tf.get_default_graph().get_tensor_by_name("theta:0")

with tf.Session() as sess:
    saver.restore(sess, "/tmp/my_model_final.ckpt") # this restores the graph's state
    best_theta_restored = theta.eval()

INFO:tensorflow:Restoring parameters from /tmp/my_model_final.ckpt


In [121]:
np.allclose(best_theta, best_theta_restored)

True

# Visualizing the graph and Training Curves Using Tensorboard

In [122]:
# So now we have a computation graph that trains a Linear Regression model using Mini-batch Gradient Descent, and we are saving checkpoints at regular intervals.
# However, we are still relying on the print() function to visualize progress during training.

# However, we have a tool called Tensorboard. If we feed it some training stats, it will display nice interactive visualizations of these stats on your web browser.

# The first step is to tweak your program a bit so it writes the graph definition and training stats to a log directory that Tensorboard will read from:



# inside Jupyter

In [123]:
# To visualize the grpah within Jupyter, we will use a Tensorboard server available online.


from tensorflow_graph_in_jupyter import show_graph

show_graph(tf.get_default_graph())


ModuleNotFoundError: No module named 'tensorflow_graph_in_jupyter'

# Using Tensorboard

In [124]:
# The simplest solution for this is to include a timestamp in the log directory name. 
# Add the following code at the beginning of the program:

reset_graph()

from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

In [125]:
n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

In [126]:
# Next add the following code at the very end of the construction phase:

mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

# The first line creates a node in the graph that will evaluate the MSE value and write it to a Tensorboard-compatible binary log string called a summary.
# The second line creates a Filewriter that you will use to write summaries to logfiles in the log directory.

In [127]:
# Next we need to update the execution phase to evaluate the mse_summary node regularly during training. 
# This will output a summary that you can then write to the events file using file_writer. 
# Here is the updated code:

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            
    best_theta = theta.eval()
   



In [128]:
# Finally you want to close the Filewriter at the end of the program:

    file_writer.close()

In [129]:
# Now we run this program: it will create a log directory and write an events file in this directory, containing both the graph definition and the MSE values.

best_theta

array([[ 2.0714476 ],
       [ 0.8462012 ],
       [ 0.11558535],
       [-0.26835832],
       [ 0.32982782],
       [ 0.00608358],
       [ 0.07052915],
       [-0.87988573],
       [-0.8634251 ]], dtype=float32)

In [130]:
# Now we will open up a shell and go to our working directory, then type: ls -l tf_logs/run* to list the contents of the log directory
# It will show a new directory for each time you run a Tensroflow Process.

In [131]:
# Now its time to fire up the Tensorboard server by running the command in the shell: The directions are on page 246 of the book.


# Name Scopes

In [132]:
# When dealing with more complex models such as neural networks, the graph can easily become cluttered with thousands of nodes.
# To avoid this, you can create name scopes to group related nodes. For example, let's modify the previous code to define the "error" and "mse ops" within a name scope called "loss":

reset_graph()

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}".format(root_logdir, now)

n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")

In [133]:
with tf.name_scope("loss") as scope:
    error = y_pred - y
    mse = tf.reduce_mean(tf.square(error), name="mse")

In [134]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

mse_summary = tf.global_variables_initializer()

mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [135]:
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            
    best_theta = theta.eval()
    
file_writer.flush()
file_writer.close()
print("Best Theta:")
print(best_theta)
        

Best Theta:
[[ 2.0703337 ]
 [ 0.8637145 ]
 [ 0.12255151]
 [-0.31211874]
 [ 0.38510373]
 [ 0.00434168]
 [-0.01232954]
 [-0.83376896]
 [-0.8030471 ]]


In [136]:
# The name of each op defined within the scope is now prefixed with "loss/":

print(error.op.name)

loss/sub


In [137]:
print(mse.op.name)

loss/mse


In [138]:
reset_graph()

a1 = tf.Variable(0, name="a") # name == "a"
a2 = tf.Variable(0, name="a") # name == "a_1"

with tf.name_scope("param"): # name == "param"
    a3 = tf.Variable(0, name="a") # name == "param/a"
    
with tf.name_scope("param"): # name == "param_1"
    a4 = tf.Variable(0, name="a") # name == "param_1/a"
    
for node in (a1, a2, a3, a4):
    print(node.op.name)

a
a_1
param/a
param_1/a


# Modularity

In [139]:
# Suppose you want to create a graph that adds the output of two rectified linear units(ReLU).
# A Rectified Linear Unit computes a linear function of the inputs , and outputs the result if it is positive, and 0 if otherwise.
# The following code does this job but is quite repetitive:

# This is pretty ugly and flat code:
reset_graph()

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")

w1 = tf.Variable(tf.random_normal((n_features, 1)), name="weights1")
w2 = tf.Variable(tf.random_normal((n_features, 1)), name="weights2")
b1 = tf.Variable(0.0, name="bias1")
b2 = tf.Variable(0.0, name="bias2")

z1 = tf.add(tf.matmul(X, w1), b1, name="z1")
z2 = tf.add(tf.matmul(X, w2), b2, name="z2")

relu1 = tf.maximum(z1, 0., name="relu1")
relu2 = tf.maximum(z1, 0., name="relu2") # Cut and Paste error! Did you spot it?

output=tf.add(relu1, relu2, name="output")

# This kind of repetitive code is hard to maintain and error-prone. In fact it contains a cut and paste error.
# It would become even worse if you wanted to add a few more RELUs. 
# Fortunately, Tensorflow let's you stay DRY(don't repeat yourself):
    # simply create a function to build a RELU.



In [140]:
# The following code creates 5 simple RELUs and outputs their sum(note that add_n() creates an operation that will compute the sum of a list of tensors):

reset_graph()

def relu(X):
    with tf.name_scope("relu"):
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X, w), b, name="z")
        return tf.maximum(z, 0., name="max")
    
# Note that when you create a node, TensorFlow checks whether its name already exists, and if it does it appends an underscore followed by an index to make the name unique.
# Using Name Scopes, you can make the graph much clearer. Simply move all the content of the relu() function inside a name scope.
# Notice that TensorFlow also gives the name scopes unique names by appending _1, _2, and so on.

# def relu(X):
    # with tf.name_scope("relu"):
      #  [.....]

In [141]:
n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name="output")

file_writer = tf.summary.FileWriter("logs/relu2", tf.get_default_graph())
file_writer.close()

# Sharing Variables

In [143]:
# If you want to share a variable between various components of your graph, one simple option is to create it first and then pass it as a parameter to the functions that need it.
# For example, suppose you want to control the ReLU threshold(currently hardcoded to 0) using a shared threshold variable for all ReLUs.
# You could just create that variable first and then pass it to the relu() function:

reset_graph()

def relu(X, threshold):
    with tf.name_scope("relu"):
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X, w), b, name="z")
        return tf.maximum(z, threshold, name="max")
    
threshold = tf.Variable(0.0, name="threshold")
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X, threshold) for i in range(5)]
output = tf.add_n(relus, name="output")

# This works fine: Now you can conteol the threshold for all ReLUs using the threshold variable.
# However, if there are many shared parameters such as this one, it will be painful to hav to pass them around as parameters all the time.
# Many people create a Python Dictionary containing all the variables in their model, and pass it around every function.


In [147]:
# Others create a class for each module. Yet another option is to set the shared variable as an attribute of the relu() function upon the first call, like so:

reset_graph()

def relu(X):
    with tf.name_scope("relu"):
        if not hasattr(relu, "threshold"):
            relu.threshold = tf.Variable(0.0, name="threshold")
        w_shape = int(X.get_shape()[1]), 1                          # not shown in the book
        w = tf.Variable(tf.random_normal(w_shape), name="weights")  # not shown
        b = tf.Variable(0.0, name="bias")                           # not shown
        z = tf.add(tf.matmul(X, w), b, name="z")                    # not shown
        return tf.maximum(z, relu.threshold, name="max")
        

In [148]:
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name="output")

In [149]:
# TensorFlow offers another option, which may lead to slightly cleaner and more modular code than the previous solutions.
# The idea is to use the get_variable() function to create a shared variable if it doesn't exist yet, or to reuse it if it already exists.

# The desired behavior(creating or reusing) is controlled by an attribute of the current_variable_scope().

# The following code will create a variable named "relu/threshold":

reset_graph()
with tf.variable_scope("relu"):
    threshold = tf.get_variable("threshold", shape=(), initializer=tf.constant_initializer(0.0))

In [150]:
# Note that is the variable has already been created by an earlier call to get_variable(), this code will raise an exception.
# This behavior prevents reusing varibles by mistake.
# If you want to reuse a variable, you need to explicitly say so by setting the variable scope's reuse attribute to True:

with tf.variable_scope("relu", reuse=True):
    threshold = tf.get_variable("threshold")
    
# This code will fetch an existing "relu/threshold" variable, or raise an exception if it doesn't exist or if it wasn't created using get_variable().

In [151]:
# Alternatively, you can set the reuse attribute to True inside the block by calling the scope's reuse_variables() method:

with tf.variable_scope("relu") as scope:
    scope.reuse_variables()
    threshold = tf.get_variable("threshold")

In [154]:
# Now you have all the pieces you need to make the relu() function access the threshold variable without having to pass it as a parameter:

reset_graph()

def relu(X):
    with tf.variable_scope("relu", reuse=True):
        threshold = tf.get_variable("threshold")
        w_shape = int(X.get_shape()[1]), 1
        
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        
        b = tf.Variable(0.0, name="bias")
        
        z = tf.add(tf.matmul(X, w), b, name="z")
        
        return tf.maximum(z, threshold, name="max")
    
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
with tf.variable_scope("relu"):
    threshold = tf.get_variable("threshold", shape=(), initializer=tf.constant_initializer(0.0))
relus = [relu(X) for relu_index in range(5)]
output = tf.add_n(relus, name="output")

file_writer = tf.summary.FileWriter("logs/relu6", tf.get_default_graph())
file_writer.close()

In [157]:
# The code abover first defines the relu() function, then creates the relu/threshold variable and builds five ReLUs by calling the relu() function.
# It is unfortunate that the threshold variable must be defined outside the relu() function where all the rest of the ReLU code resides.
# To fix this, the code creates the threshold variable within the relu() function upon the first call, then it reuses it in subsequent calls.
# Now the ReLU function doesn't have to worry about name scopes or variable sharing: it just calls get_variable(), which will create or reuse the threshold variable.
# The rest of the code calls relu() five times, making sure to set reuse=None on the first call, and reuse=True for the other calls:

reset_graph()

def relu(X):
    with tf.variable_scope("relu"):
        threshold = tf.get_variable("threshold", shape=(), initializer=tf.constant_initializer(0.0))
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X, w), b, name="z")
        return tf.maximum(z, threshold, name="max")
    
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
with tf.variable_scope("", default_name="") as scope:
    first_relu = relu(X)
    scope.reuse_variables()
    relus = [first_relu] + [relu(X) for i in range(4)]
output = tf.add_n(relus, name="output")

file_writer = tf.summary.FileWriter("logs/relu8", tf.get_default_graph())
file_writer.close()
    

In [158]:
# I guess this is another way to to write the above code:

reset_graph()

def relu(X):
    threshold = tf.get_variable("threshold", shape=(), initializer=tf.constant_initializer(0.0))
    w_shape = (int(X.get_shape()[1]), 1)
    w = tf.Variable(tf.random_normal(w_shape), name="weights")
    b = tf.Variable(0.0, name="bias")
    z = tf.add(tf.matmul(X, w), b, name="z")
    return tf.maximum(z, threshold, name="max")

X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = []
for relu_index in range(5):
    with tf.variable_scope("relu", reuse=(relu_index >= 1)) as scope:
        relus.append(relu(X))
output = tf.add_n(relus, name="output")

In [159]:
file_writer = tf.summary.FileWriter("logs/relu9", tf.get_default_graph())
file_writer.close()