### Setup

In [3]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "tensorflow"

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

### GPU Test

In [4]:
import tensorflow as tf
with tf.device('/gpu:0'):
    a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
    b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
    c = tf.matmul(a, b)

with tf.Session() as sess:
    print (sess.run(c))

[[22. 28.]
 [49. 64.]]


### Fetch and normalize some data to play with

In [4]:
import numpy as np
from sklearn.datasets import fetch_california_housing

reset_graph()

housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]


from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

### Running, saving, restoring a linear regression model

#### Saving

In [6]:
reset_graph()

n_epochs = 1000                                                                       # not shown in the book
learning_rate = 0.01                                                                  # not shown

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")            # not shown
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")            # not shown
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")                                      # not shown
error = y_pred - y                                                                    # not shown
mse = tf.reduce_mean(tf.square(error), name="mse")                                    # not shown
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)            # not shown
training_op = optimizer.minimize(mse)                                                 # not shown

init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
            tmp_path = os.path.join(PROJECT_ROOT_DIR, "tmp/my_model.ckpt")
            save_path = saver.save(sess, tmp_path)
        sess.run(training_op)
    
    best_theta = theta.eval()
    tmp_path = os.path.join(PROJECT_ROOT_DIR, "tmp/my_model_final.ckpt")
    save_path = saver.save(sess, tmp_path)

Epoch 0 MSE = 9.161542
Epoch 100 MSE = 0.71450025
Epoch 200 MSE = 0.56670487
Epoch 300 MSE = 0.5555718
Epoch 400 MSE = 0.54881126
Epoch 500 MSE = 0.5436364
Epoch 600 MSE = 0.5396291
Epoch 700 MSE = 0.5365092
Epoch 800 MSE = 0.53406775
Epoch 900 MSE = 0.53214735


In [7]:
best_theta

array([[ 2.0685525 ],
       [ 0.8874027 ],
       [ 0.14401652],
       [-0.34770885],
       [ 0.3617837 ],
       [ 0.00393811],
       [-0.04269556],
       [-0.66145295],
       [-0.6375279 ]], dtype=float32)

#### Restoring

In [8]:
with tf.Session() as sess:
    saver.restore(sess, "./tmp/my_model_final.ckpt")
    best_theta_restored = theta.eval() # not shown in the book

In [9]:
np.allclose(best_theta, best_theta_restored)

True

This means that you can import a pretrained model without having to have the corresponding Python code to build the graph. This is very handy when you keep tweaking and saving your model: you can load a previously saved model without having to search for the version of the code that built it.

### Using TensorBoard

In [10]:
reset_graph()

from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

In [11]:
n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

In [12]:
mse_summary = tf.summary.scalar('MSE', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [13]:
n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

In [14]:
def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)  
    indices = np.random.randint(m, size=batch_size)  
    X_batch = scaled_housing_data_plus_bias[indices] 
    y_batch = housing.target.reshape(-1, 1)[indices] 
    return X_batch, y_batch

In [15]:
with tf.Session() as sess:                                                        
    sess.run(init)                                                                

    for epoch in range(n_epochs):                                                 
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

    best_theta = theta.eval()     

In [16]:
file_writer.close()

--> run tensorboard --logdir tf_logs/

### Scopes, Modularity and Sharing Variables (i.e. threshold)
(see book for more examples on modularity)

In [32]:
reset_graph()

def relu(X):
    with tf.name_scope("relu"):
        if not hasattr(relu, "threshold"):
            relu.threshold = tf.Variable(0.0, name="threshold")
        w_shape = int(X.get_shape()[1]), 1                          
        w = tf.Variable(tf.random_normal(w_shape), name="weights")  
        b = tf.Variable(0.0, name="bias")                          
        z = tf.add(tf.matmul(X, w), b, name="z")                    
        return tf.maximum(z, relu.threshold, name="max")

In [33]:
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name="output")

In [34]:
file_writer = tf.summary.FileWriter("logs/relu9", tf.get_default_graph())
file_writer.close()

# A DNN on M

## Construction Phase

In [23]:
import tensorflow as tf

n_inputs = 28*28  # MNIST
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

-  X: 
    -  We don't know auf many instances a batch contains -> None
    -  We know that every Pixel is a feature -> 28 * 28
-  y:
    -  y will have one entry per instance, but we don't know the batch size

In [24]:
reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

#### A function to create a neuron layer

In [25]:
def neuron_layer(X, n_neurons, name, activation=None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1]) # get number of inputs
        # layers kernel: weights matrix -> (n_inputs * n_neurons)
        stddev = 2 / np.sqrt(n_inputs) 
        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev) # how to init weights
        W = tf.Variable(init, name="kernel")
        # bias variable: init with zero, one per neuron
        b = tf.Variable(tf.zeros([n_neurons]), name="bias")
        # computes weighted sums of the inputs + bias for every neuron in the layer
        Z = tf.matmul(X, W) + b
        # parameter for activation
        if activation is not None:
            return activation(Z)
        else:
            return Z

#### Create a DNN with own neuron layer

In [26]:
#with tf.name_scope("dnn"):
    #hidden1 = neuron_layer(X, n_hidden1, name="hidden1",
    #                       activation=tf.nn.relu)
    #hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2",
    #                       activation=tf.nn.relu)
    #logits = neuron_layer(hidden2, n_outputs, name="outputs")

#### Create a DNN with predefined layer from tf

In [27]:
with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1",
                           activation=tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2",
                           activation=tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")

#### Define a cost function using cross entropy
-> returns a 1D tensor with cross entropy for each instance 

In [28]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                              logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

#### Define a GradientDescentOptimizer

In [29]:
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

#### How to evaluate the model 

In [30]:
with tf.name_scope("eval"):
    # check for each instance if highest logit prediction 
    # corresponds to target class using in_top_k
    # returns a 1D tensor of booleans
    correct = tf.nn.in_top_k(logits, y, 1)
    # cast booleans to float, then mean (nn overall accuracy)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

#### To init and save

In [31]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

## Execution Phase

This helper fetches the data (scaled, shuffled, split, helper-func for mini-batches). 

In [32]:
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("/tmp/data/")

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [33]:
n_epochs = 40
batch_size = 50

In [34]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: mnist.validation.images,
                                            y: mnist.validation.labels})
        print(epoch, "Train accuracy:", acc_train, "Val accuracy:", acc_val)

    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Train accuracy: 0.94000006 Val accuracy: 0.89939976
1 Train accuracy: 0.9 Val accuracy: 0.9233997
2 Train accuracy: 0.92 Val accuracy: 0.93019974
3 Train accuracy: 0.92 Val accuracy: 0.93599975
4 Train accuracy: 0.96 Val accuracy: 0.94199973
5 Train accuracy: 0.94 Val accuracy: 0.94739974
6 Train accuracy: 0.91999996 Val accuracy: 0.9507997
7 Train accuracy: 0.96000004 Val accuracy: 0.95639974
8 Train accuracy: 0.96 Val accuracy: 0.9591997
9 Train accuracy: 0.92 Val accuracy: 0.9605997
10 Train accuracy: 0.98 Val accuracy: 0.9627997
11 Train accuracy: 0.98 Val accuracy: 0.96639967
12 Train accuracy: 0.94000006 Val accuracy: 0.96679974
13 Train accuracy: 1.0 Val accuracy: 0.9695996
14 Train accuracy: 0.96 Val accuracy: 0.9657997
15 Train accuracy: 1.0 Val accuracy: 0.9689997
16 Train accuracy: 1.0 Val accuracy: 0.97179973
17 Train accuracy: 1.0 Val accuracy: 0.9709997
18 Train accuracy: 0.98 Val accuracy: 0.9709998
19 Train accuracy: 0.98 Val accuracy: 0.97299975
20 Train accuracy: 0.