In [1]:
import tensorflow as tf

In [2]:
tf.__version__

'1.10.1'

In [3]:
import numpy as np
import os

# To plot pretty figures
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

In [4]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [5]:
# Load the data
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

In [6]:
X_train.shape

(60000, 28, 28)

In [7]:
X_train.dtype

dtype('uint8')

In [8]:
y_train.dtype

dtype('uint8')

In [9]:
# Preprocess the data
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)

In [10]:
# Split the training data
X_val, X_train = X_train[:5000], X_train[5000:]
y_val, y_train = y_train[:5000], y_train[5000:]

In [20]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

### Reuse a TF model

**First u need to get the graph's structure.**

The **`tf.train.import_meta_graph()` func** loads ops into default graph, and returns a Saver that u can then use to restore the model's state.

**By default, a Saver saves the structure of the graph into a.meta file**, so that's the file u should load:

In [12]:
saver = tf.train.import_meta_graph('./my_model_final.ckpt.meta')

Next u need to get a handle on all the ops u will need for training.

In [13]:
# List all the ops
for op in tf.get_default_graph().get_operations():
    print(op.name)

X
y
hidden1/kernel/Initializer/random_uniform/shape
hidden1/kernel/Initializer/random_uniform/min
hidden1/kernel/Initializer/random_uniform/max
hidden1/kernel/Initializer/random_uniform/RandomUniform
hidden1/kernel/Initializer/random_uniform/sub
hidden1/kernel/Initializer/random_uniform/mul
hidden1/kernel/Initializer/random_uniform
hidden1/kernel
hidden1/kernel/Assign
hidden1/kernel/read
hidden1/bias/Initializer/zeros
hidden1/bias
hidden1/bias/Assign
hidden1/bias/read
dnn/hidden1/MatMul
dnn/hidden1/BiasAdd
dnn/hidden1/Relu
hidden2/kernel/Initializer/random_uniform/shape
hidden2/kernel/Initializer/random_uniform/min
hidden2/kernel/Initializer/random_uniform/max
hidden2/kernel/Initializer/random_uniform/RandomUniform
hidden2/kernel/Initializer/random_uniform/sub
hidden2/kernel/Initializer/random_uniform/mul
hidden2/kernel/Initializer/random_uniform
hidden2/kernel
hidden2/kernel/Assign
hidden2/kernel/read
hidden2/bias/Initializer/zeros
hidden2/bias
hidden2/bias/Assign
hidden2/bias/read
dn

Once u know which ops u need, u can get a handle on them using the graph's `get_operation_by_name()` or `get_tensor_by_name()` methods:

In [14]:
X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")

accuracy = tf.get_default_graph().get_tensor_by_name("eval/acc:0")

training_op = tf.get_default_graph().get_operation_by_name("GradientDescent")

Create a collection containing all the important ops that people will want to get a handle on:

In [15]:
for op in (X, y, accuracy, training_op):
    tf.add_to_collection('my_important_ops', op)

In [16]:
tf.get_collection('my_important_ops')

[<tf.Tensor 'X:0' shape=(?, 784) dtype=float32>,
 <tf.Tensor 'y:0' shape=<unknown> dtype=int32>,
 <tf.Tensor 'eval/acc:0' shape=() dtype=float32>,
 <tf.Operation 'GradientDescent' type=NoOp>]

This way people who reuse the model will be able to simply write:

In [17]:
X, y, acc, training_op = tf.get_collection('my_important_ops')

In [18]:
n_epochs = 20
batch_size = 128

**（重用模型）**Start a session, and restore the model's state and continue training on your data:

In [22]:
saver = tf.train.import_meta_graph('./my_model_final.ckpt.meta')

with tf.Session() as sess:
    saver.restore(sess, './my_model_final.ckpt')
    
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_val = acc.eval(feed_dict={X: X_val, y: y_val})
        print(epoch, 'val acc:', acc_val)
    
    save_path = saver.save(sess, './my_new_model_final.ckpt')

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt
0 val acc: 0.965
1 val acc: 0.965
2 val acc: 0.966
3 val acc: 0.9666
4 val acc: 0.9676
5 val acc: 0.9658
6 val acc: 0.9672
7 val acc: 0.9684
8 val acc: 0.9706
9 val acc: 0.969
10 val acc: 0.9694
11 val acc: 0.9708
12 val acc: 0.9702
13 val acc: 0.9708
14 val acc: 0.9708
15 val acc: 0.9734
16 val acc: 0.9716
17 val acc: 0.9734
18 val acc: 0.9728
19 val acc: 0.9752


或者，如果能获得构建原始模型的图结构代码，可以这样（不适用`tf.train.import_meta_graph()`）：

In [24]:
reset_graph()

n_inputs = 28 * 28  # MNIST
n_hidden1 = 300
n_hidden2 = 50
n_hidden3 = 50
n_hidden4 = 50
n_hidden5 = 50
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2")
    hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name="hidden3")
    hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="hidden4")
    hidden5 = tf.layers.dense(hidden4, n_hidden5, activation=tf.nn.relu, name="hidden5")
    logits = tf.layers.dense(hidden5, n_outputs, name="outputs")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

learning_rate = 0.01
threshold = 1.0

optimizer = tf.train.GradientDescentOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(loss)
capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var)
              for grad, var in grads_and_vars]
training_op = optimizer.apply_gradients(capped_gvs)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [26]:
# training
with tf.Session() as sess:
    saver.restore(sess, "./my_model_final.ckpt")

    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_val, y: y_val})
        print(epoch, "Validation accuracy:", accuracy_val)

    save_path = saver.save(sess, "./my_new_model_final.ckpt")

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt
0 Validation accuracy: 0.9652
1 Validation accuracy: 0.9642
2 Validation accuracy: 0.9648
3 Validation accuracy: 0.9628
4 Validation accuracy: 0.9622
5 Validation accuracy: 0.967
6 Validation accuracy: 0.9704
7 Validation accuracy: 0.9696
8 Validation accuracy: 0.9692
9 Validation accuracy: 0.971
10 Validation accuracy: 0.9718
11 Validation accuracy: 0.9652
12 Validation accuracy: 0.971
13 Validation accuracy: 0.9724
14 Validation accuracy: 0.9704
15 Validation accuracy: 0.9728
16 Validation accuracy: 0.9712
17 Validation accuracy: 0.9736
18 Validation accuracy: 0.9698
19 Validation accuracy: 0.9736


通常，你只需要重用比较底层的层。但是`tf.train.import_meta_graph()`会加载整个模型。

现在，基于已训练前3个隐藏层添加一个新的4th隐藏层。同样需要构建一个新的输出，新的损失和一个新的优化器去最小化损失。

In [28]:
reset_graph()

n_hidden4 = 20  # new layer
n_outputs = 10  # new layer

saver = tf.train.import_meta_graph('./my_model_final.ckpt.meta')

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")

hidden3 = tf.get_default_graph().get_tensor_by_name('dnn/hidden3/Relu:0')

new_hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name='new_hidden4')
new_logits = tf.layers.dense(new_hidden4, n_outputs, name='new_outputs')

with tf.name_scope('new_loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                              logits=new_logits)
    loss = tf.reduce_mean(xentropy, name='loss')

with tf.name_scope('new_eval'):
    correct = tf.nn.in_top_k(new_logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='loss')

learning_rate = 0.01
with tf.name_scope('new_train'):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()
new_saver = tf.train.Saver()

In [29]:
# Train the new model
with tf.Session() as sess:
    init.run()
    saver.restore(sess, './my_model_final.ckpt')
    
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: X_val, y: y_val})
        print(epoch, 'val acc:', acc_val)
    
    save_path = new_saver.save(sess, './my_new_model_final.ckpt')

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt
0 val acc: 0.9394
1 val acc: 0.952
2 val acc: 0.9586
3 val acc: 0.9588
4 val acc: 0.961
5 val acc: 0.9598
6 val acc: 0.9634
7 val acc: 0.9674
8 val acc: 0.967
9 val acc: 0.9666
10 val acc: 0.9676
11 val acc: 0.9688
12 val acc: 0.9658
13 val acc: 0.9706
14 val acc: 0.969
15 val acc: 0.9706
16 val acc: 0.972
17 val acc: 0.97
18 val acc: 0.9722
19 val acc: 0.9698


If u have access to the Python code that built the original graph, u can just reuse the parts u need and drop the rest:

In [30]:
reset_graph()

n_inputs = 28 * 28  # MNIST
n_hidden1 = 300 # reused
n_hidden2 = 50  # reused
n_hidden3 = 50  # reused
n_hidden4 = 20  # new!
n_outputs = 10  # new!

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(
        X, n_hidden1, activation=tf.nn.relu, name="hidden1")       # reused
    hidden2 = tf.layers.dense(
        hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2") # reused
    hidden3 = tf.layers.dense(
        hidden2, n_hidden3, activation=tf.nn.relu, name="hidden3") # reused
    hidden4 = tf.layers.dense(
        hidden3, n_hidden4, activation=tf.nn.relu, name="hidden4") # new
    logits = tf.layers.dense(hidden4, n_outputs, name="outputs")   # new

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

However, u must create one Saver to restore the pretrained model, and another Saver to save the new model.

In [33]:
reuse_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                               scope="hidden[123]")
restore_saver = tf.train.Saver(reuse_vars)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, './my_model_final.ckpt')
    
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: X_val, y: y_val})
        print(epoch, 'val acc:', acc_val)
    
    save_path = saver.save(sess, "./my_new_model_final.ckpt")

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt
0 val acc: 0.9304
1 val acc: 0.947
2 val acc: 0.954
3 val acc: 0.9568
4 val acc: 0.9596
5 val acc: 0.9592
6 val acc: 0.9628
7 val acc: 0.9656
8 val acc: 0.9656
9 val acc: 0.9656
10 val acc: 0.9682
11 val acc: 0.9678
12 val acc: 0.9662
13 val acc: 0.9686
14 val acc: 0.9692
15 val acc: 0.972
16 val acc: 0.9712
17 val acc: 0.971
18 val acc: 0.9716
19 val acc: 0.9708
