In [1]:
# Common imports
import numpy as np
import tensorflow as tf

# To make the output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

In [2]:
tf.__version__

'1.9.0'

## 9.1 Creating First Graph and Running It in a Session

In [3]:
x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")
f = x*x*y + y + 2

This code does not actually perform any computationj. It just creates a **computation graph**. In fact the variables are not initialized yet.

To evaluate this graph, u need to open a Tensorflow **session** and use it initialize the variables and evaluate f.

+ Way 1:

In [4]:
# Create a session
sess = tf.Session()

In [5]:
sess

<tensorflow.python.client.session.Session at 0x2605b0df208>

In [6]:
# Initialize the variable x
sess.run(x.initializer)

In [7]:
# Initialize the variable y
sess.run(y.initializer)

In [8]:
# Evaluate
result = sess.run(f)

In [9]:
result

42

In [10]:
# Close the session
sess.close()

+ Way 2:

In [11]:
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()

> The session is automatically closed at the end of the block.

In [12]:
result

42

+ way 3:

使用函数`tf.global_variables_initializer()`，不需要手动地对每一个variable进行initializer。它不是立即执行initializer，而是creates a node in the graph that will initialize all variables when it is run。

In [13]:
init = tf.global_variables_initializer()  # Prepare an init node

In [14]:
init

<tf.Operation 'init' type=NoOp>

In [15]:
with tf.Session() as sess:
    init.run()  # actually initialize all the variables
    result = f.eval()

+ way 4:

Inside Jupyter or within a Python shell u may prefer to create an `InteractiveSession`. The only difference from a regular Session is that **when an InteractiveSession is created it automatically sets itself as the default session**, so u don't need a block (but u do need to close the session manually).

In [16]:
sess = tf.InteractiveSession()
init.run()

In [17]:
result = f.eval()
result

42

In [18]:
sess.close()

**A TensorFlow program is typically split into two parts:**
1. **construction phase**: builds a computation graph representing the ML model and the computations required to train it.
2. **execution phase**: generally runs a loop that evaluates a training step repeatedly (for example, one step per mini-batch), gradually improving the model params.

## 9.2 Managing Graphs

Any node u create is automatically added to the default graph:

In [19]:
x1 = tf.Variable(1)
x1

<tf.Variable 'Variable:0' shape=() dtype=int32_ref>

In [20]:
x1.graph is tf.get_default_graph()

True

In [21]:
graph = tf.Graph()

In [22]:
with graph.as_default():
    x2 = tf.Variable(2)

In [23]:
x2.graph

<tensorflow.python.framework.ops.Graph at 0x2605c0e8278>

In [24]:
x2.graph is graph

True

In [25]:
x2.graph is tf.get_default_graph()

False

In [26]:
tf.get_default_graph()

<tensorflow.python.framework.ops.Graph at 0x2605b0df6a0>

### Lifecycle of a Node Value

When u evaluate a node, TF automatically determines the set of nodes that it depends on and it evaluates those nodes first.

In [27]:
x = tf.constant(1, shape=(2, 3))
with tf.Session() as sess:
    print(x.eval())

[[1 1 1]
 [1 1 1]]


In [28]:
x = tf.constant([1, 2, 3, 4, 5, 6], shape=(2, 3))
with tf.Session() as sess:
    print(x.eval())

[[1 2 3]
 [4 5 6]]


In [29]:
x = tf.constant([1, 2, 3, 4, 5], shape=(2, 3))
with tf.Session() as sess:
    print(x.eval())

[[1 2 3]
 [4 5 5]]


In [30]:
w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

In [31]:
with tf.Session() as sess:
    print(y.eval())
    print(z.eval())

10
15


> evaluate node z时不会使用evaluate node y时x和w的计算值，会重新计算x和w。**会两次evaluate w和x。**

有效地计算y和z，不会两次evaluate w和x，should must ask TF to evaluate both y and z in just one graph run。

In [32]:
with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y_val)
    print(z_val)

10
15


## 9.3 Linear Regression with TensorFlow

TensorFlow operations (ops) can take any number of inputs and produce any number of outputs.

Constants and variables take no input. (They are called **source ops**)

The inputs and outputs are multidimensional arrays, called **tensors** (hence the name 'tensor flow').

In [33]:
from sklearn.datasets import fetch_california_housing

In [34]:
# Fetch the housing dataset
housing = fetch_california_housing()
m, n = housing.data.shape  # (20640, 8)
# Add an extra bias input feature(x_0=1) to all training instances
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

In [35]:
# Create nodes in the graph (don't perform any computation immeadiately)

# Create two tensorflow constant nodes to hold
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

In [36]:
# Perform nodes when the graph is run
with tf.Session() as sess:
    theta_value = theta.eval()
    print(theta.graph)

<tensorflow.python.framework.ops.Graph object at 0x000002605B0DF6A0>


In [37]:
theta_value

array([[-3.7185181e+01],
       [ 4.3633747e-01],
       [ 9.3952334e-03],
       [-1.0711310e-01],
       [ 6.4479220e-01],
       [-4.0338000e-06],
       [-3.7813708e-03],
       [-4.2348403e-01],
       [-4.3721911e-01]], dtype=float32)

> The main benefit of this code versus computing the Normal Equation directly using NumPy is that TensorFlow will automatically run this on your GPU if u have one).

## 9.4 Implementing Gradient Descent

### 9.4.1 Manually Computing the Gradients

In [38]:
# Normalize the input feature vectors
from sklearn.preprocessing import StandardScaler

std_scaler = StandardScaler()
housing_data_scaled = std_scaler.fit_transform(housing.data)
housing_data_plus_bias_scaled = np.c_[np.ones((m, 1)), housing_data_scaled]

In [39]:
reset_graph()

In [40]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(housing_data_plus_bias_scaled, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n+1, 1], -1., 1., seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

# Create a node in the graph that will initialize all variables when it is run
# do not actually perform the initialization immediately
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)  # actually initialize all the variables
    
    for epoch in range(n_epochs):
        # Print out the current mse every 100 iterations
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
        
    best_theta = theta.eval()

Epoch 0 MSE = 9.161543
Epoch 100 MSE = 0.71450067
Epoch 200 MSE = 0.5667049
Epoch 300 MSE = 0.5555719
Epoch 400 MSE = 0.5488112
Epoch 500 MSE = 0.5436362
Epoch 600 MSE = 0.5396294
Epoch 700 MSE = 0.53650916
Epoch 800 MSE = 0.5340678
Epoch 900 MSE = 0.5321474


In [41]:
best_theta

array([[ 2.0685523 ],
       [ 0.8874027 ],
       [ 0.14401656],
       [-0.34770885],
       [ 0.36178368],
       [ 0.00393811],
       [-0.04269556],
       [-0.66145283],
       [-0.6375278 ]], dtype=float32)

### 9.4.2 Using autodiff

In [42]:
reset_graph()

In [43]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(housing_data_plus_bias_scaled, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform(
    [n+1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
# Automatically and efficiently compute the gradients
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

In [44]:
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)  # Initialize
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch:", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

Epoch: 0 MSE = 9.161543
Epoch: 100 MSE = 0.7145006
Epoch: 200 MSE = 0.566705
Epoch: 300 MSE = 0.5555719
Epoch: 400 MSE = 0.5488112
Epoch: 500 MSE = 0.5436362
Epoch: 600 MSE = 0.5396294
Epoch: 700 MSE = 0.5365092
Epoch: 800 MSE = 0.5340678
Epoch: 900 MSE = 0.5321474


In [45]:
best_theta

array([[ 2.0685525 ],
       [ 0.8874027 ],
       [ 0.14401658],
       [-0.34770882],
       [ 0.36178368],
       [ 0.00393811],
       [-0.04269556],
       [-0.6614528 ],
       [-0.6375277 ]], dtype=float32)

### 9.4.3 Using an Optimizer

In [46]:
reset_graph()

In [47]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(housing_data_plus_bias_scaled, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform(
    [n+1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

# Use a Gradient Descent optimizer
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                       momentum=0.9)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)  # Initialize
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch:", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

print("\nBest theta:")
best_theta

Epoch: 0 MSE = 9.161543
Epoch: 100 MSE = 0.53056407
Epoch: 200 MSE = 0.5250113
Epoch: 300 MSE = 0.52441096
Epoch: 400 MSE = 0.52433306
Epoch: 500 MSE = 0.52432257
Epoch: 600 MSE = 0.52432126
Epoch: 700 MSE = 0.52432096
Epoch: 800 MSE = 0.52432096
Epoch: 900 MSE = 0.52432096

Best theta:


array([[ 2.068558  ],
       [ 0.8296286 ],
       [ 0.11875337],
       [-0.26554456],
       [ 0.3057109 ],
       [-0.00450251],
       [-0.03932662],
       [-0.89986444],
       [-0.87052065]], dtype=float32)

### 9.4.5 Using a momentum optimizer

It is often converges much faster than Gradient Descent.

In [48]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(housing_data_plus_bias_scaled, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform(
    [n+1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

# Use a momentum optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)  # Initialize
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch:", epoch, "MSE =", mse.eval())
        sess.run(training_op)
    best_theta = theta.eval()

print("\nBest theta:")
best_theta

Epoch: 0 MSE = 9.161543
Epoch: 100 MSE = 0.7145006
Epoch: 200 MSE = 0.566705
Epoch: 300 MSE = 0.5555719
Epoch: 400 MSE = 0.5488112
Epoch: 500 MSE = 0.5436362
Epoch: 600 MSE = 0.5396294
Epoch: 700 MSE = 0.5365092
Epoch: 800 MSE = 0.5340678
Epoch: 900 MSE = 0.5321474

Best theta:


array([[ 2.0685525 ],
       [ 0.8874027 ],
       [ 0.14401658],
       [-0.34770882],
       [ 0.36178368],
       [ 0.00393811],
       [-0.04269556],
       [-0.6614528 ],
       [-0.6375277 ]], dtype=float32)