In [15]:
import tensorflow as tf

x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")

f = x*x*y + y + 2

# The above code does no computation,
# it only creates a computation graph

# Create a session to run the computation
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print(result)

sess.close()

42


In [25]:
# Or we can call within a with statement
# then sess will be set as the default session
# equivalent to calling tf.get_default_session().run(...)
# eval() method is calledf for each node to evaluate its value

with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()
    print(result)

# And the session is closed automatically at then end of the with block

AttributeError: 'Tensor' object has no attribute 'initializer'

In [17]:
# global_variables_initializer() method can be used to
# initialize all variables in one pass

init = tf.global_variables_initializer() # prepare an init node

with tf.Session() as sess:
    init.run()
    print(f.eval())

42


In [18]:
# InteractiveSession: automatically set itself as default session

sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)
sess.close()

42


In [19]:
# 9.1 Managing Graphs

# Any Node you create is automatically added to the default graph
x1 = tf.Variable(1)
x1.graph is tf.get_default_graph()

True

In [20]:
graph = tf.Graph()
with graph.as_default():
    x2 = tf.Variable(2)

x2.graph is graph


True

In [21]:
x2.graph is tf.get_default_graph()

False

In [22]:
# you can call tf.reset_default_graph() to reset the default graph
# removing all nodes added to the original default graph
tf.reset_default_graph()

In [24]:
# 9.2 Lifecycle of a Node Value
# When you evaluate a node, tensorflow will automatically decide the
# nodes it depends on and evaluate those nodes first
w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3

with tf.Session() as sess:
    print(y.eval())
    print(z.eval())

10
15


In [27]:
# node values are dropped between graph runs,
# only variable values are kept
# a Variable starts its life when initializer is run and ends when session is closed
# if you don't want to evaluate w and x twice, you must ask tf to evaluate both y
# and z in just one graph run:

with tf.Session() as sess:
    y_val, z_val = sess.run([y, z])
    print(y_val)
    print(z_val)

10
15


In [28]:
# 9.3 Linear Regression with Tensorflow
# tensorflow operations can take any number of inputs and produce any number of outputs
# multi-dimensional array inpout/outputs are called tensors
# like numpy arrays, tensors have a shape and a type
# in python api, tensors are simply represented by numpy ndarrays

import numpy as np
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
m, n = housing.data.shape
print(m, n)

Downloading Cal. housing from https://ndownloader.figshare.com/files/5976036 to /home/yucdong/scikit_learn_data


20640 8


In [30]:
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]
print(housing_data_plus_bias.shape)

(20640, 9)


In [32]:
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape((-1, 1)), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

with tf.Session() as sess:
    print(theta.eval())

[[-3.7185181e+01]
 [ 4.3633747e-01]
 [ 9.3952334e-03]
 [-1.0711310e-01]
 [ 6.4479220e-01]
 [-4.0338000e-06]
 [-3.7813708e-03]
 [-4.2348403e-01]
 [-4.3721911e-01]]


In [33]:
# Tensorflow will automatically run this on your GPU card if you have one
# and installed the GPU-version tensorflow

In [46]:
# 9.4 Implementing Gradienet Descent
# Use tensorflow's autodiff feature to compute gradients automatically
# Remember to normalize the values before doing gradient descent, otherwise the process
# could be very slow, StandardScaler is a very handy tool

# First, manually computing the gradient
from sklearn.preprocessing import StandardScaler

housing_data = housing.data 

# NOTE! don't include the biase 1 term when you do scaling
scaler = StandardScaler()
scaler.fit(housing_data)
scaled_housing_data = scaler.transform(housing_data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]
scaled_housing_data_plus_bias

array([[ 1.        ,  2.34476576,  0.98214266, ..., -0.04959654,
         1.05254828, -1.32783522],
       [ 1.        ,  2.33223796, -0.60701891, ..., -0.09251223,
         1.04318455, -1.32284391],
       [ 1.        ,  1.7826994 ,  1.85618152, ..., -0.02584253,
         1.03850269, -1.33282653],
       ...,
       [ 1.        , -1.14259331, -0.92485123, ..., -0.0717345 ,
         1.77823747, -0.8237132 ],
       [ 1.        , -1.05458292, -0.84539315, ..., -0.09122515,
         1.77823747, -0.87362627],
       [ 1.        , -0.78012947, -1.00430931, ..., -0.04368215,
         1.75014627, -0.83369581]])

In [47]:
tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape((-1, 1)), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE = ", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()
    print(best_theta)

Epoch 0 MSE =  16.035032
Epoch 100 MSE =  0.95619637
Epoch 200 MSE =  0.703
Epoch 300 MSE =  0.65488666
Epoch 400 MSE =  0.6219415
Epoch 500 MSE =  0.5976489
Epoch 600 MSE =  0.5796423
Epoch 700 MSE =  0.5662488
Epoch 800 MSE =  0.5562509
Epoch 900 MSE =  0.5487576
[[ 2.0685523 ]
 [ 0.876067  ]
 [ 0.16684367]
 [-0.2796205 ]
 [ 0.28495544]
 [ 0.01238902]
 [-0.04438478]
 [-0.50569004]
 [-0.4781877 ]]


In [48]:
# Using autodiff
tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape((-1, 1)), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")

# The gradients function takes an op(in this casae mse) and a list of variables
# in this case just theta , and it creates a list of ops to compute the gradients
# of the op with regards to each variable
gradients = tf.gradients(mse, [theta])[0]
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE = ", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()
    print(best_theta)

Epoch 0 MSE =  9.565645
Epoch 100 MSE =  0.88104236
Epoch 200 MSE =  0.68769765
Epoch 300 MSE =  0.6475845
Epoch 400 MSE =  0.6193192
Epoch 500 MSE =  0.5979511
Epoch 600 MSE =  0.58168715
Epoch 700 MSE =  0.5692466
Epoch 800 MSE =  0.55968124
Epoch 900 MSE =  0.55228907
[[ 2.0685525 ]
 [ 0.920448  ]
 [ 0.16836335]
 [-0.37619278]
 [ 0.37040687]
 [ 0.01236582]
 [-0.04545964]
 [-0.45293707]
 [-0.4312157 ]]


In [49]:
# tensorflow uses reverse-mode autodiff
# good when there are many inputs and few outputs, as is often the case in
# neural networks

In [50]:
# Using an optimizer
# Using autodiff
tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape((-1, 1)), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE = ", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()
    print(best_theta)


Epoch 0 MSE =  7.1315026
Epoch 100 MSE =  0.651013
Epoch 200 MSE =  0.56375724
Epoch 300 MSE =  0.5549118
Epoch 400 MSE =  0.54890764
Epoch 500 MSE =  0.54416835
Epoch 600 MSE =  0.540398
Epoch 700 MSE =  0.5373849
Epoch 800 MSE =  0.5349668
Epoch 900 MSE =  0.53301865
[[ 2.0685525 ]
 [ 0.90302134]
 [ 0.14349097]
 [-0.3836767 ]
 [ 0.3943668 ]
 [ 0.003544  ]
 [-0.04298412]
 [-0.65061617]
 [-0.6288214 ]]


In [52]:
# Using momentum optimizer
# Using autodiff
tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape((-1, 1)), dtype=tf.float32, name="y")

theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE = ", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()
    print(best_theta)


Epoch 0 MSE =  7.447088
Epoch 100 MSE =  0.5324077
Epoch 200 MSE =  0.52472115
Epoch 300 MSE =  0.5243629
Epoch 400 MSE =  0.5243261
Epoch 500 MSE =  0.52432156
Epoch 600 MSE =  0.524321
Epoch 700 MSE =  0.52432084
Epoch 800 MSE =  0.5243208
Epoch 900 MSE =  0.52432096
[[ 2.0685577 ]
 [ 0.82962567]
 [ 0.11875283]
 [-0.2655388 ]
 [ 0.3057062 ]
 [-0.00450268]
 [-0.0393265 ]
 [-0.89987135]
 [-0.8705273 ]]


In [53]:
# 9.5 Feeding data to the Training Algorithm

# modify the code to use mini-batch 
# each iteration use the next mini-batch
# best way to do this is to use placeholder nodes

# placeholder nodes only output data, it does no computation
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5

# feed_dict specifies the value of A
with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A: [[1, 2, 3]]})
    B_val_2 = B.eval(feed_dict={A: [[1, 2, 3], [4, 5, 6]]})
    
    print(B_val_1)
    print(B_val_2)

[[6. 7. 8.]]
[[ 6.  7.  8.]
 [ 9. 10. 11.]]


In [60]:
# Using autodiff
tf.reset_default_graph()

n_epochs = 1000
learning_rate = 0.01

# Define the batch size and #batches
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

def fetch_batch(epoch, batch_index, batch_size):
    start = batch_index * batch_size
    end = min(m, start + batch_size)
    X_batch, Y_batch = scaled_housing_data_plus_bias[start:end, :], housing.target.reshape((-1, 1))[start:end, :]
    return X_batch, Y_batch

def random_fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)  # not shown in the book
    indices = np.random.randint(m, size=batch_size)  # not shown
    X_batch = scaled_housing_data_plus_bias[indices] # not shown
    y_batch = housing.target.reshape(-1, 1)[indices] # not shown
    return X_batch, y_batch


theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        #if epoch % 100 == 0:
        #    print("Epoch", epoch, "MSE = ", mse.eval())
            
        for batch_index in range(n_batches):
            X_batch, Y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: Y_batch})

    best_theta = theta.eval()
    print(best_theta)


[[ 2.002909  ]
 [ 0.7834715 ]
 [ 0.13612019]
 [-0.23013733]
 [ 0.27156752]
 [-0.00706378]
 [-0.01321162]
 [-0.8690099 ]
 [-0.89599925]]
