In [1]:
import tensorflow as tf
import numpy as np

def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# He init

In [5]:
reset_graph()

n_inputs = 28 * 28
n_hidden1 = 300

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")

he_init = tf.variance_scaling_initializer()
hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu,
                          kernel_initializer=he_init, name="hidden1")

# Batch Normalization

In [6]:
reset_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")

training = tf.placeholder_with_default(False, shape=(), name="training")

hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1")
bn1 = tf.layers.batch_normalization(hidden1, training=training, momentum=0.9)
bn1_act = tf.nn.elu(bn1)

hidden2 = tf.layers.dense(bn1_act, n_hidden2, name="hidden2")
bn2 = tf.layers.batch_normalization(hidden2, training=training, momentum=0.9)
bn2_act = tf.nn.elu(bn2)

logits_before_bn = tf.layers.dense(bn2_act, n_outputs, name="outputs")
logits = tf.layers.batch_normalization(logits_before_bn, training=training,
                                       momentum=0.9)


# 完整代码

In [11]:
reset_graph()

import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

batch_norm_momentum = 0.9

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name="training")

with tf.name_scope("dnn"):
    he_init = tf.variance_scaling_initializer()
    
    hidden1 = tf.layers.dense(X, n_hidden1, kernel_initializer=he_init, name="hidden1")
    bn1 = tf.layers.batch_normalization(hidden1, training=training, momentum=batch_norm_momentum)
    bn1_act = tf.nn.elu(bn1)
    
    hidden2 = tf.layers.dense(bn1_act, n_hidden2, kernel_initializer=he_init, name="hidden2")
    bn2 = tf.layers.batch_normalization(hidden2, training=training, momentum=batch_norm_momentum)
    bn2_act = tf.nn.elu(bn2)
    
    logits_before_bn = tf.layers.dense(bn2_act, n_outputs, kernel_initializer=he_init, name="outputs")
    logits = tf.layers.batch_normalization(logits_before_bn, training=training, momentum=batch_norm_momentum)
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

mnist = input_data.read_data_sets("../dataset/mnist/")

n_epochs = 40
batch_size = 50

"""
UPDATE_OPS is a collection of ops (operations performed when the graph runs, like multiplication, ReLU, etc.), not variables. 
Specifically, this collection maintains a list of ops which need to run after every training step.
"""
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
"""
训练时，需要更新moving_mean和moving_variance。
默认情况下，更新操作被放入tf.GraphKeys.UPDATE_OPS，
因此需要将它们作为依赖项添加到train_op。此外，
在获取update_ops集合之前，请务必添加batch_normalization操作。
否则，update_ops将为空，并且训练/推断将无法正常工作。
"""

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run([training_op, extra_update_ops], 
                     feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: mnist.test.images,
                                            y: mnist.test.labels})
        print(epoch, "Train acc:", acc_train, "Test acc:", acc_test)
    
    save_path = saver.save(sess, "./model/my_model_final.ckpt")

Extracting ../dataset/mnist/train-images-idx3-ubyte.gz
Extracting ../dataset/mnist/train-labels-idx1-ubyte.gz
Extracting ../dataset/mnist/t10k-images-idx3-ubyte.gz
Extracting ../dataset/mnist/t10k-labels-idx1-ubyte.gz
0 Train acc: 0.88 Test acc: 0.8984
1 Train acc: 0.9 Test acc: 0.9157
2 Train acc: 0.9 Test acc: 0.9212
3 Train acc: 0.96 Test acc: 0.9254
4 Train acc: 0.94 Test acc: 0.9318
5 Train acc: 0.9 Test acc: 0.9344
6 Train acc: 0.94 Test acc: 0.9392
7 Train acc: 0.94 Test acc: 0.9436
8 Train acc: 0.88 Test acc: 0.9469
9 Train acc: 0.92 Test acc: 0.9506
10 Train acc: 0.96 Test acc: 0.9524
11 Train acc: 0.92 Test acc: 0.9567
12 Train acc: 0.98 Test acc: 0.9587
13 Train acc: 0.92 Test acc: 0.9588
14 Train acc: 1.0 Test acc: 0.9611
15 Train acc: 0.98 Test acc: 0.9624
16 Train acc: 0.98 Test acc: 0.9634
17 Train acc: 0.96 Test acc: 0.965
18 Train acc: 1.0 Test acc: 0.9655
19 Train acc: 0.98 Test acc: 0.9663
20 Train acc: 1.0 Test acc: 0.9663
21 Train acc: 1.0 Test acc: 0.967
22 Train 

In [9]:
reset_graph()

import tensorflow as tf
import numpy as np
from functools import partial
from tensorflow.examples.tutorials.mnist import input_data

"""
partial函数的作用：将所作用的函数作为partial（）函数的第一个参数，
原函数的各个参数依次作为partial（）函数的后续参数，
原函数有关键字参数的一定要带上关键字，没有的话，按原有参数顺序进行补充。
"""
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

batch_norm_momentum = 0.9

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name="training")

with tf.name_scope("dnn"):
    he_init = tf.variance_scaling_initializer()
    
    my_batch_norm_layer = partial(
            tf.layers.batch_normalization,
            training=training,
            momentum=batch_norm_momentum)
    
    my_dense_layer = partial(
            tf.layers.dense,
            kernel_initializer=he_init)
    
    hidden1 = my_dense_layer(X, n_hidden1, name="hidden1")
    bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))
    hidden2 = my_dense_layer(bn1, n_hidden2, name="hidden2")
    bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))
    logits_before_bn = my_dense_layer(bn2, n_outputs, name="outputs")
    logits = my_batch_norm_layer(logits_before_bn)
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

mnist = input_data.read_data_sets("../dataset/mnist/")

n_epochs = 40
batch_size = 50

extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run([training_op, extra_update_ops], 
                     feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: mnist.test.images,
                                            y: mnist.test.labels})
        print(epoch, "Train acc:", acc_train, "Test acc:", acc_test)
    
    save_path = saver.save(sess, "./model/my_model_final.ckpt")

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ../dataset/mnist/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ../dataset/mnist/train-labels-idx1-ubyte.gz
Extracting ../dataset/mnist/t10k-images-idx3-ubyte.gz
Extracting ../dataset/mnist/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
0 Train acc: 0.88 Test acc: 0.8984
1 Train acc: 0.9 Test acc: 0.9157
2 Train acc: 0.9 Test acc: 0.9212
3 Train acc: 0.96 Test acc: 0.9254
4 Train acc: 0.94 Test acc: 0.9318
5 Train acc: 0.9 Test acc: 0.9344
6 Train acc: 0.94 Test acc: 0.9392
7 Train acc: 0.94 Test acc: 0.9436
8 Train acc: 0.88 Test acc: 0.9469
9 Train acc: 0.92 Test acc

In [12]:
tf.GraphKeys.UPDATE_OPS

'update_ops'

In [13]:
tf.get_collection(tf.GraphKeys.UPDATE_OPS)

[<tf.Operation 'dnn/batch_normalization/cond_2/Merge' type=Merge>,
 <tf.Operation 'dnn/batch_normalization/cond_3/Merge' type=Merge>,
 <tf.Operation 'dnn/batch_normalization_1/cond_2/Merge' type=Merge>,
 <tf.Operation 'dnn/batch_normalization_1/cond_3/Merge' type=Merge>,
 <tf.Operation 'dnn/batch_normalization_2/cond_2/Merge' type=Merge>,
 <tf.Operation 'dnn/batch_normalization_2/cond_3/Merge' type=Merge>]

# Gradient Clipping

In [14]:
reset_graph()

n_inputs = 28 * 28  # MNIST
n_hidden1 = 300
n_hidden2 = 50
n_hidden3 = 50
n_hidden4 = 50
n_hidden5 = 50
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2")
    hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name="hidden3")
    hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="hidden4")
    hidden5 = tf.layers.dense(hidden4, n_hidden5, activation=tf.nn.relu, name="hidden5")
    logits = tf.layers.dense(hidden5, n_outputs, name="outputs")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

In [15]:
learning_rate = 0.01
threshold = 1.0
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(loss)
capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var) for grad, var in grads_and_vars]
training_op = optimizer.apply_gradients(capped_gvs)

In [16]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

mnist = input_data.read_data_sets("../dataset/mnist/")

n_epochs = 40
batch_size = 50

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, 
                     feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: mnist.test.images,
                                            y: mnist.test.labels})
        print(epoch, "Train acc:", acc_train, "Test acc:", acc_test)
    
    save_path = saver.save(sess, "./model/my_model_final.ckpt")

Extracting ../dataset/mnist/train-images-idx3-ubyte.gz
Extracting ../dataset/mnist/train-labels-idx1-ubyte.gz
Extracting ../dataset/mnist/t10k-images-idx3-ubyte.gz
Extracting ../dataset/mnist/t10k-labels-idx1-ubyte.gz
0 Train acc: 0.86 Test acc: 0.8831
1 Train acc: 0.96 Test acc: 0.9262
2 Train acc: 0.94 Test acc: 0.9445
3 Train acc: 0.96 Test acc: 0.951
4 Train acc: 0.98 Test acc: 0.9562
5 Train acc: 0.94 Test acc: 0.9596
6 Train acc: 1.0 Test acc: 0.9643
7 Train acc: 0.96 Test acc: 0.9647
8 Train acc: 0.96 Test acc: 0.9678
9 Train acc: 1.0 Test acc: 0.9667
10 Train acc: 1.0 Test acc: 0.97
11 Train acc: 0.98 Test acc: 0.9694
12 Train acc: 1.0 Test acc: 0.9735
13 Train acc: 1.0 Test acc: 0.9692
14 Train acc: 1.0 Test acc: 0.9729
15 Train acc: 1.0 Test acc: 0.9738
16 Train acc: 1.0 Test acc: 0.9728
17 Train acc: 1.0 Test acc: 0.9683
18 Train acc: 1.0 Test acc: 0.973
19 Train acc: 1.0 Test acc: 0.9744
20 Train acc: 1.0 Test acc: 0.973
21 Train acc: 1.0 Test acc: 0.9708
22 Train acc: 1.0 

# 重用TensorFlow模型

In [24]:
reset_graph()

n_inputs = 28 * 28  # MNIST
n_hidden1 = 300
n_hidden2 = 50
n_hidden3 = 50
n_hidden4 = 50
n_hidden5 = 50
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2")
    hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name="hidden3")
    hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="hidden4")
    hidden5 = tf.layers.dense(hidden4, n_hidden5, activation=tf.nn.relu, name="hidden5")
    logits = tf.layers.dense(hidden5, n_outputs, name="outputs")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
learning_rate = 0.01
threshold = 1.0
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(loss)
capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var) for grad, var in grads_and_vars]
training_op = optimizer.apply_gradients(capped_gvs)    

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

mnist = input_data.read_data_sets("../dataset/mnist/")

n_epochs = 40
batch_size = 50

Extracting ../dataset/mnist/train-images-idx3-ubyte.gz
Extracting ../dataset/mnist/train-labels-idx1-ubyte.gz
Extracting ../dataset/mnist/t10k-images-idx3-ubyte.gz
Extracting ../dataset/mnist/t10k-labels-idx1-ubyte.gz


In [25]:
with tf.Session() as sess:

    saver.restore(sess, "./model/my_model_final.ckpt")
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, 
                     feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: mnist.test.images,
                                            y: mnist.test.labels})
        print(epoch, "Train acc:", acc_train, "Test acc:", acc_test)
    
    save_path = saver.save(sess, "./model/my_model_final.ckpt")

INFO:tensorflow:Restoring parameters from ./model/my_model_final.ckpt
0 Train acc: 1.0 Test acc: 0.9763
1 Train acc: 1.0 Test acc: 0.9758
2 Train acc: 1.0 Test acc: 0.976
3 Train acc: 1.0 Test acc: 0.9759
4 Train acc: 1.0 Test acc: 0.9758
5 Train acc: 1.0 Test acc: 0.976
6 Train acc: 1.0 Test acc: 0.9756
7 Train acc: 1.0 Test acc: 0.9761
8 Train acc: 1.0 Test acc: 0.976
9 Train acc: 1.0 Test acc: 0.9763
10 Train acc: 1.0 Test acc: 0.9761
11 Train acc: 1.0 Test acc: 0.9761
12 Train acc: 1.0 Test acc: 0.9762
13 Train acc: 1.0 Test acc: 0.9759
14 Train acc: 1.0 Test acc: 0.976
15 Train acc: 1.0 Test acc: 0.9764
16 Train acc: 1.0 Test acc: 0.9761
17 Train acc: 1.0 Test acc: 0.9762
18 Train acc: 1.0 Test acc: 0.9761
19 Train acc: 1.0 Test acc: 0.9764
20 Train acc: 1.0 Test acc: 0.9757
21 Train acc: 1.0 Test acc: 0.976
22 Train acc: 1.0 Test acc: 0.9758
23 Train acc: 1.0 Test acc: 0.9761
24 Train acc: 1.0 Test acc: 0.9763
25 Train acc: 1.0 Test acc: 0.9761
26 Train acc: 1.0 Test acc: 0.976
2

# 重用原有模型的一部分

In [27]:
reset_graph()

n_inputs = 28 * 28  # MNIST
n_hidden1 = 300 # reused
n_hidden2 = 50  # reused
n_hidden3 = 50  # reused
n_hidden4 = 20  # new!
n_outputs = 10  # new!

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")       # reused
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2") # reused
    hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name="hidden3") # reused
    hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="hidden4") # new!
    logits = tf.layers.dense(hidden4, n_outputs, name="outputs")                         # new!
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")
    
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
init = tf.global_variables_initializer()

In [28]:
reuse_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                               scope="hidden[123]")
restore_saver = tf.train.Saver(reuse_vars)

In [29]:
saver = tf.train.Saver()

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./model/my_model_final.ckpt")
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, 
                     feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: mnist.test.images,
                                            y: mnist.test.labels})
        print(epoch, "Train acc:", acc_train, "Test acc:", acc_test)
    
    save_path = saver.save(sess, "./model/my_model_final.ckpt")

INFO:tensorflow:Restoring parameters from ./model/my_model_final.ckpt
0 Train acc: 0.98 Test acc: 0.967
1 Train acc: 1.0 Test acc: 0.9725
2 Train acc: 1.0 Test acc: 0.9758
3 Train acc: 1.0 Test acc: 0.9765
4 Train acc: 1.0 Test acc: 0.9759
5 Train acc: 1.0 Test acc: 0.9766
6 Train acc: 1.0 Test acc: 0.9774
7 Train acc: 1.0 Test acc: 0.9773
8 Train acc: 1.0 Test acc: 0.9763
9 Train acc: 1.0 Test acc: 0.9772
10 Train acc: 1.0 Test acc: 0.9775
11 Train acc: 1.0 Test acc: 0.9765
12 Train acc: 1.0 Test acc: 0.9774
13 Train acc: 1.0 Test acc: 0.9772
14 Train acc: 1.0 Test acc: 0.9763
15 Train acc: 1.0 Test acc: 0.9782
16 Train acc: 1.0 Test acc: 0.9777
17 Train acc: 1.0 Test acc: 0.977
18 Train acc: 1.0 Test acc: 0.9779
19 Train acc: 1.0 Test acc: 0.9778
20 Train acc: 1.0 Test acc: 0.9771
21 Train acc: 1.0 Test acc: 0.9774
22 Train acc: 1.0 Test acc: 0.9777
23 Train acc: 1.0 Test acc: 0.9776
24 Train acc: 1.0 Test acc: 0.9774
25 Train acc: 1.0 Test acc: 0.9775
26 Train acc: 1.0 Test acc: 0.9

# 重用其他框架的模型

In [30]:
reset_graph()

n_inputs = 2
n_hidden1 = 3

original_w = [[1., 2., 3.], [4., 5., 6.]] # Load the weights from the other framework
original_b = [7., 8., 9.]                 # Load the biases from the other framework

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")

# [...] Build the rest of the model

# Get a handle on the assignment nodes for the hidden1 variables
graph = tf.get_default_graph()
assign_kernel = graph.get_operation_by_name("hidden1/kernel/Assign")
assign_bias = graph.get_operation_by_name("hidden1/bias/Assign")
init_kernel = assign_kernel.inputs[1]
init_bias = assign_bias.inputs[1]

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init, feed_dict={init_kernel: original_w, init_bias: original_b})
    # [...] Train the model on your new task
    print(hidden1.eval(feed_dict={X: [[10.0, 11.0]]}))

[[ 61.  83. 105.]]


# 冻结低层

In [35]:
reset_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_hidden3 = 50
n_hidden4 = 20
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2")
    hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name="hidden3")
    hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="hidden4")
    logits = tf.layers.dense(hidden4, n_outputs, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

In [36]:
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 
                                   scope="hidden[34]|outputs")
    training_op = optimizer.minimize(loss, var_list=train_vars)

In [37]:
init = tf.global_variables_initializer()

reuse_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                               scope="hidden[123]")
restore_saver = tf.train.Saver(reuse_vars)

saver = tf.train.Saver()

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./model/my_model_final.ckpt")
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: mnist.test.images,
                                            y: mnist.test.labels})
        print(epoch, "Train acc:", acc_train, "Test acc:", acc_test)
    
    save_path = saver.save(sess, "./model/my_model_final.ckpt")   

INFO:tensorflow:Restoring parameters from ./model/my_model_final.ckpt
0 Train acc: 0.98 Test acc: 0.9753
1 Train acc: 1.0 Test acc: 0.976
2 Train acc: 1.0 Test acc: 0.9773
3 Train acc: 1.0 Test acc: 0.9767
4 Train acc: 1.0 Test acc: 0.9764
5 Train acc: 1.0 Test acc: 0.9771
6 Train acc: 1.0 Test acc: 0.9771
7 Train acc: 1.0 Test acc: 0.9771
8 Train acc: 1.0 Test acc: 0.9776
9 Train acc: 1.0 Test acc: 0.9768
10 Train acc: 1.0 Test acc: 0.9768
11 Train acc: 1.0 Test acc: 0.9769
12 Train acc: 1.0 Test acc: 0.9773
13 Train acc: 1.0 Test acc: 0.977
14 Train acc: 1.0 Test acc: 0.9773
15 Train acc: 1.0 Test acc: 0.9772
16 Train acc: 1.0 Test acc: 0.9774
17 Train acc: 1.0 Test acc: 0.9771
18 Train acc: 1.0 Test acc: 0.9767
19 Train acc: 1.0 Test acc: 0.9771
20 Train acc: 1.0 Test acc: 0.977
21 Train acc: 1.0 Test acc: 0.9773
22 Train acc: 1.0 Test acc: 0.9769
23 Train acc: 1.0 Test acc: 0.977
24 Train acc: 1.0 Test acc: 0.9772
25 Train acc: 1.0 Test acc: 0.9771
26 Train acc: 1.0 Test acc: 0.976

# 缓存冻结层

In [2]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [3]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [7]:
reset_graph()

import numpy as np

batch_size = 50
n_epochs = 20

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_hidden3 = 50
n_hidden4 = 20
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2")
    
    hidden2_stop = tf.stop_gradient(hidden2)
    
    hidden3 = tf.layers.dense(hidden2_stop, n_hidden3, activation=tf.nn.relu, name="hidden3")
    hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="hidden4")
    logits = tf.layers.dense(hidden4, n_outputs, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")
    
learning_rate = 0.01
    
with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 
                                   scope="hidden[34]|outputs")
    training_op = optimizer.minimize(loss, var_list=train_vars)
    
init = tf.global_variables_initializer()

reuse_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                               scope="hidden[123]")
restore_saver = tf.train.Saver(reuse_vars)

saver = tf.train.Saver()

n_batches = len(X_train) // batch_size

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./model/my_model_final.ckpt")
    
    h2_cache = sess.run(hidden2, feed_dict={X: X_train})
    h2_cache_valid = sess.run(hidden2, feed_dict={X: X_valid})
    
    for epoch in range(n_epochs):
        shuffled_idx = np.random.permutation(len(X_train))
        hidden2_batches = np.array_split(h2_cache[shuffled_idx], n_batches)
        y_batches = np.array_split(y_train[shuffled_idx], n_batches)
        for hidden2_batch, y_batch in zip(hidden2_batches, y_batches):
            sess.run(training_op, feed_dict={hidden2: hidden2_batch, y: y_batch})
        
        accuracy_val = accuracy.eval(feed_dict={hidden2: h2_cache_valid, y: y_valid})
        print(epoch, "Validation acc:", accuracy_val)
    save_path = saver.save(sess, "./model/my_model_final.ckpt")

INFO:tensorflow:Restoring parameters from ./model/my_model_final.ckpt
0 Validation acc: 0.9774
1 Validation acc: 0.9776
2 Validation acc: 0.9764
3 Validation acc: 0.978
4 Validation acc: 0.9766
5 Validation acc: 0.9774
6 Validation acc: 0.9772
7 Validation acc: 0.977
8 Validation acc: 0.9774
9 Validation acc: 0.9778
10 Validation acc: 0.9778
11 Validation acc: 0.978
12 Validation acc: 0.9778
13 Validation acc: 0.9778
14 Validation acc: 0.978
15 Validation acc: 0.9778
16 Validation acc: 0.9778
17 Validation acc: 0.9778
18 Validation acc: 0.9778
19 Validation acc: 0.9776


# 学习速率调度

In [35]:
reset_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2")
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))  

In [36]:
with tf.name_scope("train"):
    initial_learning_rate = 0.1
    decay_steps = 10000
    decay_rate = 1 / 10
    global_step = tf.Variable(0, trainable=False, name="global_step")
    learning_rate = tf.train.exponential_decay(initial_learning_rate,
                                               global_step,
                                               decay_steps,
                                               decay_rate)
    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)
    training_op = optimizer.minimize(loss, global_step=global_step)

In [37]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 30
batch_size = 50

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Training acc:", acc, "Validation acc:", acc_val)
    
    save_path = saver.save(sess, "./model/my_model_final.ckpt")

0 Training acc: 0.98 Validation acc: 0.9648
1 Training acc: 0.96 Validation acc: 0.9748
2 Training acc: 0.98 Validation acc: 0.978
3 Training acc: 1.0 Validation acc: 0.9804
4 Training acc: 0.98 Validation acc: 0.9814
5 Training acc: 1.0 Validation acc: 0.9852
6 Training acc: 1.0 Validation acc: 0.9852
7 Training acc: 1.0 Validation acc: 0.9838
8 Training acc: 1.0 Validation acc: 0.9838
9 Training acc: 1.0 Validation acc: 0.984
10 Training acc: 1.0 Validation acc: 0.9836
11 Training acc: 1.0 Validation acc: 0.9838
12 Training acc: 1.0 Validation acc: 0.9836
13 Training acc: 1.0 Validation acc: 0.9838
14 Training acc: 1.0 Validation acc: 0.9836
15 Training acc: 1.0 Validation acc: 0.9836
16 Training acc: 1.0 Validation acc: 0.9836
17 Training acc: 1.0 Validation acc: 0.9836
18 Training acc: 1.0 Validation acc: 0.9836
19 Training acc: 1.0 Validation acc: 0.9836
20 Training acc: 1.0 Validation acc: 0.9836
21 Training acc: 1.0 Validation acc: 0.9836
22 Training acc: 1.0 Validation acc: 0.9

# 正则化

### 提前停止

### l1和l2正则化

In [38]:
reset_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
    logits = tf.layers.dense(hidden1, n_outputs, name="outputs")

In [43]:
tf.get_collection(tf.GraphKeys.VARIABLES)



[<tf.Variable 'hidden1/kernel:0' shape=(784, 300) dtype=float32_ref>,
 <tf.Variable 'hidden1/bias:0' shape=(300,) dtype=float32_ref>,
 <tf.Variable 'outputs/kernel:0' shape=(300, 10) dtype=float32_ref>,
 <tf.Variable 'outputs/bias:0' shape=(10,) dtype=float32_ref>,
 <tf.Variable 'hidden1/kernel/Momentum:0' shape=(784, 300) dtype=float32_ref>,
 <tf.Variable 'hidden1/bias/Momentum:0' shape=(300,) dtype=float32_ref>,
 <tf.Variable 'outputs/kernel/Momentum:0' shape=(300, 10) dtype=float32_ref>,
 <tf.Variable 'outputs/bias/Momentum:0' shape=(10,) dtype=float32_ref>]

In [40]:
W1 = tf.get_default_graph().get_tensor_by_name("hidden1/kernel:0")
W2 = tf.get_default_graph().get_tensor_by_name("outputs/kernel:0")

scale = 0.001

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    base_loss = tf.reduce_mean(xentropy, name="avg_xentropy")
    reg_losses = tf.reduce_sum(tf.abs(W1)) + tf.reduce_sum(tf.abs(W2))
    loss = tf.add(base_loss, scale * reg_losses, name="loss")

In [41]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) 

learning_rate = 0.01    

with tf.name_scope("train"):
#     initial_learning_rate = 0.1
#     decay_steps = 10000
#     decay_rate = 1 / 10
#     global_step = tf.Variable(0, trainable=False, name="global_step")
#     learning_rate = tf.train.exponential_decay(initial_learning_rate,
#                                                global_step,
#                                                decay_steps,
#                                                decay_rate)
    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)
    training_op = optimizer.minimize(loss) #, global_step=global_step
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 20
batch_size = 200

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Training acc:", acc, "Validation acc:", acc_val)
    
    save_path = saver.save(sess, "./model/my_model_final.ckpt")

0 Training acc: 0.895 Validation acc: 0.9064
1 Training acc: 0.91 Validation acc: 0.9044
2 Training acc: 0.875 Validation acc: 0.9026
3 Training acc: 0.89 Validation acc: 0.907
4 Training acc: 0.935 Validation acc: 0.908
5 Training acc: 0.905 Validation acc: 0.9134
6 Training acc: 0.92 Validation acc: 0.9196
7 Training acc: 0.915 Validation acc: 0.9198
8 Training acc: 0.93 Validation acc: 0.9222
9 Training acc: 0.91 Validation acc: 0.926
10 Training acc: 0.915 Validation acc: 0.9274
11 Training acc: 0.925 Validation acc: 0.9284
12 Training acc: 0.875 Validation acc: 0.9304
13 Training acc: 0.935 Validation acc: 0.9316
14 Training acc: 0.94 Validation acc: 0.9316
15 Training acc: 0.915 Validation acc: 0.9316
16 Training acc: 0.96 Validation acc: 0.9336
17 Training acc: 0.94 Validation acc: 0.9352
18 Training acc: 0.88 Validation acc: 0.9362
19 Training acc: 0.905 Validation acc: 0.9342


### 层数多的情况

In [44]:
from functools import partial

reset_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

In [45]:
scale = 0.001

my_dense_layer = partial(tf.layers.dense,
                         activation=tf.nn.relu,
                         kernel_regularizer=tf.contrib.layers.l1_regularizer(scale))

with tf.name_scope("dnn"):
    hidden1 = my_dense_layer(X, n_hidden1, name="hidden1")
    hidden2 = my_dense_layer(hidden1, n_hidden2, name="hidden2")
    logits = my_dense_layer(hidden2, n_outputs, activation=None, name="outputs")

In [46]:
# W1 = tf.get_default_graph().get_tensor_by_name("hidden1/kernel:0")
# W2 = tf.get_default_graph().get_tensor_by_name("outputs/kernel:0")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    base_loss = tf.reduce_mean(xentropy, name="avg_xentropy")
    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    loss = tf.add_n([base_loss] + reg_losses, name="loss")

In [47]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) 

learning_rate = 0.01    

with tf.name_scope("train"):
    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)
    training_op = optimizer.minimize(loss) #, global_step=global_step
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 20
batch_size = 200

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Training acc:", acc, "Validation acc:", acc_val)
    
    save_path = saver.save(sess, "./model/my_model_final.ckpt")

0 Training acc: 0.93 Validation acc: 0.9152
1 Training acc: 0.9 Validation acc: 0.9202
2 Training acc: 0.9 Validation acc: 0.9152
3 Training acc: 0.9 Validation acc: 0.923
4 Training acc: 0.95 Validation acc: 0.9284
5 Training acc: 0.92 Validation acc: 0.9316
6 Training acc: 0.955 Validation acc: 0.9382
7 Training acc: 0.93 Validation acc: 0.9402
8 Training acc: 0.95 Validation acc: 0.9462
9 Training acc: 0.93 Validation acc: 0.9428
10 Training acc: 0.955 Validation acc: 0.9474
11 Training acc: 0.96 Validation acc: 0.9484
12 Training acc: 0.91 Validation acc: 0.9446
13 Training acc: 0.955 Validation acc: 0.9474
14 Training acc: 0.955 Validation acc: 0.9514
15 Training acc: 0.92 Validation acc: 0.9496
16 Training acc: 0.975 Validation acc: 0.9478
17 Training acc: 0.955 Validation acc: 0.9516
18 Training acc: 0.915 Validation acc: 0.951
19 Training acc: 0.93 Validation acc: 0.9478


### Dropout

In [4]:
from functools import partial

reset_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

In [5]:
training = tf.placeholder_with_default(False, shape=(), name="training")

dropout_rate = 0.5
X_drop = tf.layers.dropout(X, dropout_rate, training=training)

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X_drop, n_hidden1, activation=tf.nn.relu, name="hidden1")
    hidden1_drop = tf.layers.dropout(hidden1, dropout_rate, training)
    
    hidden2 = tf.layers.dense(hidden1_drop, n_hidden2, activation=tf.nn.relu, name="hidden2")
    hidden2_drop = tf.layers.dropout(hidden2, dropout_rate, training=training)
    logits = tf.layers.dense(hidden2_drop, n_outputs, name="outputs")

In [6]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) 

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
#     base_loss = tf.reduce_mean(xentropy, name="avg_xentropy")
#     reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
#     loss = tf.add_n([base_loss] + reg_losses, name="loss")
    loss = tf.reduce_mean(xentropy, name="loss")

learning_rate = 0.01    

with tf.name_scope("train"):
    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)
    training_op = optimizer.minimize(loss) #, global_step=global_step
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 20
batch_size = 200

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training: True})
        acc = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Training acc:", acc, "Validation acc:", acc_val)
    
    save_path = saver.save(sess, "./model/my_model_final.ckpt")

0 Training acc: 0.9 Validation acc: 0.9028
1 Training acc: 0.92 Validation acc: 0.9232
2 Training acc: 0.905 Validation acc: 0.9334
3 Training acc: 0.925 Validation acc: 0.943
4 Training acc: 0.95 Validation acc: 0.9438
5 Training acc: 0.92 Validation acc: 0.953
6 Training acc: 0.975 Validation acc: 0.9564
7 Training acc: 0.955 Validation acc: 0.9608
8 Training acc: 0.975 Validation acc: 0.9626
9 Training acc: 0.975 Validation acc: 0.9656
10 Training acc: 0.965 Validation acc: 0.967
11 Training acc: 0.96 Validation acc: 0.9692
12 Training acc: 0.965 Validation acc: 0.97
13 Training acc: 0.965 Validation acc: 0.9718
14 Training acc: 0.98 Validation acc: 0.971
15 Training acc: 0.96 Validation acc: 0.9724
16 Training acc: 0.985 Validation acc: 0.9734
17 Training acc: 0.975 Validation acc: 0.9736
18 Training acc: 0.975 Validation acc: 0.9748
19 Training acc: 0.955 Validation acc: 0.9762


# 最大范数正则化

In [7]:
reset_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_outputs = 10

learning_rate = 0.01
momentum = 0.9

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2")
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
with tf.name_scope("train"):
    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [13]:
tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

[<tf.Variable 'hidden1/kernel:0' shape=(784, 300) dtype=float32_ref>,
 <tf.Variable 'hidden1/bias:0' shape=(300,) dtype=float32_ref>,
 <tf.Variable 'hidden2/kernel:0' shape=(300, 50) dtype=float32_ref>,
 <tf.Variable 'hidden2/bias:0' shape=(50,) dtype=float32_ref>,
 <tf.Variable 'outputs/kernel:0' shape=(50, 10) dtype=float32_ref>,
 <tf.Variable 'outputs/bias:0' shape=(10,) dtype=float32_ref>]

In [9]:
threshold = 1.0
weights1 = tf.get_default_graph().get_tensor_by_name("hidden1/kernel:0")
clipped_weights1 = tf.clip_by_norm(weights1, clip_norm=threshold, axes=1)
clip_weights1 = tf.assign(weights1, clipped_weights1)

weights2 = tf.get_default_graph().get_tensor_by_name("hidden2/kernel:0")
clipped_weights2 = tf.clip_by_norm(weights2, clip_norm=threshold, axes=1)
clip_weights2 = tf.assign(weights2, clipped_weights2)

In [10]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 20
batch_size = 50

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            clip_weights1.eval()
            clip_weights2.eval()
        acc_valid = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Validation acc:", acc_valid)
    save_path = saver.save(sess, "./model/my_model_final.ckpt")

0 Validation acc: 0.9566
1 Validation acc: 0.9696
2 Validation acc: 0.9712
3 Validation acc: 0.9766
4 Validation acc: 0.977
5 Validation acc: 0.9776
6 Validation acc: 0.9816
7 Validation acc: 0.9812
8 Validation acc: 0.9798
9 Validation acc: 0.9818
10 Validation acc: 0.981
11 Validation acc: 0.9836
12 Validation acc: 0.9822
13 Validation acc: 0.9842
14 Validation acc: 0.9838
15 Validation acc: 0.9838
16 Validation acc: 0.9826
17 Validation acc: 0.984
18 Validation acc: 0.9842
19 Validation acc: 0.984


In [14]:
for variable in tf.global_variables():
    print(variable.name)

hidden1/kernel:0
hidden1/bias:0
hidden2/kernel:0
hidden2/bias:0
outputs/kernel:0
outputs/bias:0
hidden1/kernel/Momentum:0
hidden1/bias/Momentum:0
hidden2/kernel/Momentum:0
hidden2/bias/Momentum:0
outputs/kernel/Momentum:0
outputs/bias/Momentum:0


In [15]:
def max_norm_regularizer(threshold, axes=1, name="max_norm", 
                         collection="max_norm"):
    def max_norm(weights):
        clipped = tf.clip_by_norm(weights, clip_norm=threshold, axes=axes)
        clip_weights = tf.assign(weights, clipped)
        tf.add_to_collection(collection, clip_weights)
        return None
    return max_norm

In [16]:
reset_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_outputs = 10

learning_rate = 0.01
momentum = 0.9

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

max_norm_reg = max_norm_regularizer(1.0)

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, 
                              activation=tf.nn.relu, 
                              kernel_regularizer=max_norm_reg, 
                              name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, 
                              activation=tf.nn.relu, 
                              kernel_regularizer=max_norm_reg,
                              name="hidden2")
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
with tf.name_scope("train"):
    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 20
batch_size = 50

clip_all_weights = tf.get_collection("max_norm")

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            sess.run(clip_all_weights)
        acc_valid = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Validation acc:", acc_valid)
    save_path = saver.save(sess, "./model/my_model_final.ckpt")

0 Validation acc: 0.9556
1 Validation acc: 0.9706
2 Validation acc: 0.9682
3 Validation acc: 0.9726
4 Validation acc: 0.9766
5 Validation acc: 0.976
6 Validation acc: 0.981
7 Validation acc: 0.9798
8 Validation acc: 0.9838
9 Validation acc: 0.9824
10 Validation acc: 0.9814
11 Validation acc: 0.9832
12 Validation acc: 0.983
13 Validation acc: 0.9832
14 Validation acc: 0.9838
15 Validation acc: 0.9842
16 Validation acc: 0.9834
17 Validation acc: 0.984
18 Validation acc: 0.9838
19 Validation acc: 0.9836


In [17]:
tf.get_collection("max_norm")

[<tf.Tensor 'dnn/hidden1/kernel/Regularizer/Assign:0' shape=(784, 300) dtype=float32_ref>,
 <tf.Tensor 'dnn/hidden2/kernel/Regularizer/Assign:0' shape=(300, 50) dtype=float32_ref>]