In [3]:
# Setup
# Common imports
import numpy as np
import os
import tensorflow as tf

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "deep"

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

  return f(*args, **kwds)


In [9]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [4]:
reset_graph()

import tensorflow as tf

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')

training = tf.placeholder_with_default(False, shape=(), name='training')

hidden1 = tf.layers.dense(X, n_hidden1, name='hidden1')
bn1 = tf.layers.batch_normalization(hidden1, training=training, momentum=0.9)
bn1_act = tf.nn.elu(bn1)

hidden2 = tf.layers.dense(bn1_act, n_hidden2, name='hidden2')
bn2 = tf.layers.batch_normalization(hidden2, training=training, momentum=0.9)
bn2_act = tf.nn.elu(bn2)

logits_before_bn = tf.layers.dense(bn2_act, n_outputs, name='outputs')
logits = tf.layers.batch_normalization(logits_before_bn, training=training, momentum=0.9)

In [10]:
from functools import partial

batch_norm_momentum = 0.9
learning_rate = 0.01

reset_graph()
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.int64, shape=(None), name='y')
training = tf.placeholder_with_default(False, shape=(), name='training')

with tf.name_scope('dnn'):
    he_init = tf.contrib.layers.variance_scaling_initializer()
    
    my_batch_norm_layer = partial(
        tf.layers.batch_normalization,
        training=training,
        momentum=batch_norm_momentum
    )
    
    my_dense_layer = partial(
        tf.layers.dense,
        kernel_initializer=he_init
    )
    hidden1 = my_dense_layer(X, n_hidden1, name='hidden1')
    bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))
    hidden2 = my_dense_layer(bn1, n_hidden2, name='hidden2')
    bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))
    logits_before_bn = my_dense_layer(bn2, n_outputs, name='outputs')
    logits = my_batch_norm_layer(logits_before_bn)
    
with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

with tf.name_scope('train'):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [11]:
n_epochs = 20
batch_size = 200

In [12]:
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run([training_op, extra_update_ops],
                    feed_dict={training: True, X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: mnist.test.images,
                                               y: mnist.test.labels})
        print(epoch, 'Test accuracy:', accuracy_val)
    save_path = saver.save(sess, './my_model_final.ckpt')

0 Test accuracy: 0.8678
1 Test accuracy: 0.8941
2 Test accuracy: 0.9116
3 Test accuracy: 0.9222
4 Test accuracy: 0.9292
5 Test accuracy: 0.9336
6 Test accuracy: 0.94
7 Test accuracy: 0.9427
8 Test accuracy: 0.945
9 Test accuracy: 0.9479
10 Test accuracy: 0.9516
11 Test accuracy: 0.9538
12 Test accuracy: 0.9571
13 Test accuracy: 0.9565
14 Test accuracy: 0.959
15 Test accuracy: 0.961
16 Test accuracy: 0.9606
17 Test accuracy: 0.9623
18 Test accuracy: 0.9639
19 Test accuracy: 0.9637


In [13]:
[v.name for v in tf.trainable_variables()]

['hidden1/kernel:0',
 'hidden1/bias:0',
 'batch_normalization/gamma:0',
 'batch_normalization/beta:0',
 'hidden2/kernel:0',
 'hidden2/bias:0',
 'batch_normalization_1/gamma:0',
 'batch_normalization_1/beta:0',
 'outputs/kernel:0',
 'outputs/bias:0',
 'batch_normalization_2/gamma:0',
 'batch_normalization_2/beta:0']

In [14]:
[v.name for v in tf.global_variables()]

['hidden1/kernel:0',
 'hidden1/bias:0',
 'batch_normalization/gamma:0',
 'batch_normalization/beta:0',
 'batch_normalization/moving_mean:0',
 'batch_normalization/moving_variance:0',
 'hidden2/kernel:0',
 'hidden2/bias:0',
 'batch_normalization_1/gamma:0',
 'batch_normalization_1/beta:0',
 'batch_normalization_1/moving_mean:0',
 'batch_normalization_1/moving_variance:0',
 'outputs/kernel:0',
 'outputs/bias:0',
 'batch_normalization_2/gamma:0',
 'batch_normalization_2/beta:0',
 'batch_normalization_2/moving_mean:0',
 'batch_normalization_2/moving_variance:0']

In [15]:

reset_graph()

n_inputs = 28 * 28  # MNIST
n_hidden1 = 300
n_hidden2 = 50
n_hidden3 = 50
n_hidden4 = 50
n_hidden5 = 50
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2")
    hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name="hidden3")
    hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="hidden4")
    hidden5 = tf.layers.dense(hidden4, n_hidden5, activation=tf.nn.relu, name="hidden5")
    logits = tf.layers.dense(hidden5, n_outputs, name="outputs")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

In [16]:
learning_rate = 0.01

In [17]:
threshold = 1.0

optimizer = tf.train.GradientDescentOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(loss)
capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var)
              for grad, var in grads_and_vars]
training_op = optimizer.apply_gradients(capped_gvs)init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [18]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

In [19]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [20]:
n_epochs = 20
batch_size = 200

In [21]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: mnist.test.images,
                                                y: mnist.test.labels})
        print(epoch, "Test accuracy:", accuracy_val)

    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Test accuracy: 0.3107
1 Test accuracy: 0.7969
2 Test accuracy: 0.8811
3 Test accuracy: 0.9034
4 Test accuracy: 0.914
5 Test accuracy: 0.9205
6 Test accuracy: 0.9255
7 Test accuracy: 0.9299
8 Test accuracy: 0.9333
9 Test accuracy: 0.9395
10 Test accuracy: 0.9424
11 Test accuracy: 0.9464
12 Test accuracy: 0.9463
13 Test accuracy: 0.949
14 Test accuracy: 0.9527
15 Test accuracy: 0.9525
16 Test accuracy: 0.9552
17 Test accuracy: 0.957
18 Test accuracy: 0.9587
19 Test accuracy: 0.9605


In [22]:
# Exercise 8.1

he_init = tf.contrib.layers.variance_scaling_initializer()

def dnn(inputs, n_hidden_layers=5, n_neurons=100, name=None,
       activation=tf.nn.elu, initializer=he_init):
    with tf.variable_scope(name, 'dnn'):
        for layer in range(n_hidden_layers):
            inputs = tf.layers.dense(inputs, n_neurons, activation=activation,
                                    kernel_initializer=initializer,
                                    name='hidden%d' % (layer + 1))
        return inputs

In [24]:
n_inputs = 28 * 28
n_outputs = 5

reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.int64, shape=(None), name='y')

dnn_outputs = dnn(X)

logits = tf.layers.dense(dnn_outputs, n_outputs, kernel_initializer=he_init, name='logits')
Y_proba = tf.nn.softmax(logits, name='Y_proba')

In [34]:
learning_rate = 0.01
he_init = tf.contrib.layers.variance_scaling_initializer()

xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")

optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss, name="training_op")

correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [26]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [35]:
X_train1 = mnist.train.images[mnist.train.labels < 5]
y_train1 = mnist.train.labels[mnist.train.labels < 5]
X_valid1 = mnist.validation.images[mnist.validation.labels < 5]
y_valid1 = mnist.validation.labels[mnist.validation.labels < 5]
X_test1 = mnist.test.images[mnist.test.labels < 5]
y_test1 = mnist.test.labels[mnist.test.labels < 5]

In [36]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()

    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train1))
        for rnd_indices in np.array_split(rnd_idx, len(X_train1) // batch_size):
            X_batch, y_batch = X_train1[rnd_indices], y_train1[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid1, y: y_valid1})
        if loss_val < best_loss:
            save_path = saver.save(sess, "./my_mnist_model_0_to_4.ckpt")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    saver.restore(sess, "./my_mnist_model_0_to_4.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test1, y: y_test1})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

0	Validation loss: 0.095175	Best loss: 0.095175	Accuracy: 98.12%
1	Validation loss: 1.652569	Best loss: 0.095175	Accuracy: 18.73%
2	Validation loss: 1.667905	Best loss: 0.095175	Accuracy: 18.73%
3	Validation loss: 1.643468	Best loss: 0.095175	Accuracy: 19.08%
4	Validation loss: 1.724565	Best loss: 0.095175	Accuracy: 19.27%
5	Validation loss: 1.654243	Best loss: 0.095175	Accuracy: 18.73%
6	Validation loss: 1.828444	Best loss: 0.095175	Accuracy: 19.08%
7	Validation loss: 1.741695	Best loss: 0.095175	Accuracy: 18.73%
8	Validation loss: 1.670783	Best loss: 0.095175	Accuracy: 19.27%
9	Validation loss: 1.756941	Best loss: 0.095175	Accuracy: 19.08%
10	Validation loss: 1.799783	Best loss: 0.095175	Accuracy: 19.08%
11	Validation loss: 1.822926	Best loss: 0.095175	Accuracy: 18.73%
12	Validation loss: 1.759844	Best loss: 0.095175	Accuracy: 22.01%
13	Validation loss: 1.722078	Best loss: 0.095175	Accuracy: 20.91%
14	Validation loss: 1.665473	Best loss: 0.095175	Accuracy: 22.01%
15	Validation loss: 

INFO:tensorflow:Restoring parameters from ./my_mnist_model_0_to_4.ckpt


Final test accuracy: 98.38%


In [48]:
# Finding Hyperparameters
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.exceptions import NotFittedError

class DNNClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, n_hidden_layers=5, n_neurons=100, optimizer_class=tf.train.AdamOptimizer,
                learning_rate=0.01, batch_size=20, activation=tf.nn.elu, initializer=he_init,
                batch_norm_momentum=None, dropout_rate=None, random_state=None):
        self.n_hidden_layers = n_hidden_layers
        self.n_neurons = n_neurons
        self.optimizer_class = optimizer_class
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.activation = activation
        self.initializer = initializer
        self.batch_norm_momentum = batch_norm_momentum
        self.dropout_rate = dropout_rate
        self.random_state = random_state
        self._session = None
        
    def _dnn(self, inputs):
        for layer in range(self.n_hidden_layers):
            if self.dropout_rate:
                inputs = tf.layers.dropout(inputs, self.dropout_rate, training=self._training)
            inputs = tf.layers.dense(inputs, self.n_neurons,
                                    kernel_initializer=self.initializer,
                                    name='hidden%d' % (layer +1 ))
            if self.batch_norm_momentum:
                inputs = tf.layers.batch_normalization(
                    inputs,
                    momentum=self.batch_norm_momentum,
                    training=self._training
                )
            
            inputs = self.activation(inputs, name='hidden%d' % (layer + 1))
            
        return inputs
    
    def _build_graph(self, n_inputs, n_outputs):
        if self.random_state is not None:
            tf.set_random_seed(self.random_state)
            np.random.seed(self.random_state)
        
        X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
        y = tf.placeholder(tf.int32, shape=(None), name='y')
        
        if self.batch_norm_momentum or self.dropout_rate:
            self._training = tf.placeholder_with_default(False, shape=(), name='training')
        else:
            self._training = None
        
        dnn_outputs = self._dnn(X)
        
        logits = tf.layers.dense(
            dnn_outputs,
            n_outputs,
            kernel_initializer=he_init,
            name='logits'
        )
        
        Y_proba = tf.nn.softmax(logits, name='Y_proba')
        
        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
        
        loss = tf.reduce_mean(xentropy, name='loss')
        
        optimizer = self.optimizer_class(learning_rate=self.learning_rate)
        training_op = optimizer.minimize(loss)
        
        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='accuracy')
        
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()
        
        self._X, self._y = X, y
        self._Y_proba, self._loss = Y_proba, loss
        self._training_op, self._accuracy = training_op, accuracy
        self._init, self._saver = init, saver
        
    def close_session(self):
        if self._session:
            self._session.close()
    
    def _get_model_params(self):
        with self._graph.as_default():
            gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
            return {gvar.op.name: value for gvar, value in zip(gvars, self._session.run(gvars))}
    
    def _restore_model_params(self, model_params):
        gvar_names = list(model_params.keys())
        assign_ops = {gvar_name: self._graph.get_operation_by_name(gvar_name + '/Assign')
                     for gvar_name in gvar_names}
        init_values = {
            gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()
        }
        
        feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
        self._session.run(assign_ops, feed_dict=feed_dict)
        
    def fit(self, X, y, n_epochs=100, X_valid=None, y_valid=None):
        self.close_session()
        
        n_inputs = X.shape[1]
        self.classes_ = np.unique(y)
        n_outputs = len(self.classes_)
        
        self.class_to_index_ = {label: index
                               for index, label in enumerate(self.classes_)}
        y = np.array([self.class_to_index_[label]
                     for label in y], dtype=np.int32)
        
        self._graph = tf.Graph()
        with self._graph.as_default():
            self._build_graph(n_inputs, n_outputs)
            extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            
        max_checks_without_progress = 20
        checks_without_progress = 0
        best_loss = np.infty
        best_params = None
        
        self._session = tf.Session(graph=self._graph)
        with self._session.as_default() as sess:
            self._init.run()
            for epoch in range(n_epochs):
                rnd_idx = np.random.permutation(len(X))
                for rnd_indices in np.array_split(rnd_idx, len(X) // self.batch_size):
                    X_batch, y_batch = X[rnd_indices], y[rnd_indices]
                    feed_dict = {self._X: X_batch, self._y: y_batch}
                    if self._training is not None:
                        feed_dict[self._training] = True
                    sess.run(self._training_op, feed_dict=feed_dict)
                    if extra_update_ops:
                        sess.run(extra_update_ops, feed_dict=feed_dict)

                if X_valid is not None and y_valid is not None:
                    loss_val, acc_val = sess.run(
                        [self._loss, self._accuracy],
                        feed_dict={self._X: X_valid, self._y: y_valid}
                    )
                    
                    if loss_val < best_loss:
                        best_params = self._get_model_params()
                        best_loss = loss_val
                        checks_without_progress = 0
                    else:
                        checks_without_progress += 1
                    print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_val, best_loss, acc_val * 100))
                    if checks_without_progress > max_checks_without_progress:
                        print("Early stopping!")
                        break
                else:
                    loss_train, acc_train = sess.run([self._loss, self._accuracy],
                                                     feed_dict={self._X: X_batch,
                                                                self._y: y_batch})
                    print("{}\tLast training batch loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_train, acc_train * 100))
            if best_params:
                self._restore_model_params(best_params)
            return self
                        
    def predict_proba(self, X):
        if not self._session:
            raise NotFittedError("This %s instance is not fitted yet" % self.__class__.__name__)
        with self._session.as_default() as sess:
            return self._Y_proba.eval(feed_dict={self._X: X})

    def predict(self, X):
        class_indices = np.argmax(self.predict_proba(X), axis=1)
        return np.array([[self.classes_[class_index]]
                         for class_index in class_indices], np.int32)

    def save(self, path):
        self._saver.save(self._session, path)

In [49]:
dnn_clf = DNNClassifier(random_state=42)
dnn_clf.fit(X_train1, y_train1, n_epochs=1000, X_valid=X_valid1, y_valid=y_valid1)

0	Validation loss: 0.129281	Best loss: 0.129281	Accuracy: 97.03%
1	Validation loss: 0.085987	Best loss: 0.085987	Accuracy: 97.62%
2	Validation loss: 0.163253	Best loss: 0.085987	Accuracy: 96.87%
3	Validation loss: 0.111381	Best loss: 0.085987	Accuracy: 97.07%
4	Validation loss: 0.130923	Best loss: 0.085987	Accuracy: 96.99%
5	Validation loss: 0.123629	Best loss: 0.085987	Accuracy: 98.28%
6	Validation loss: 0.167125	Best loss: 0.085987	Accuracy: 96.79%
7	Validation loss: 0.123174	Best loss: 0.085987	Accuracy: 97.38%
8	Validation loss: 0.120984	Best loss: 0.085987	Accuracy: 97.34%
9	Validation loss: 0.123353	Best loss: 0.085987	Accuracy: 97.69%
10	Validation loss: 0.557742	Best loss: 0.085987	Accuracy: 94.37%
11	Validation loss: 0.262050	Best loss: 0.085987	Accuracy: 96.83%
12	Validation loss: 5.188450	Best loss: 0.085987	Accuracy: 94.29%
13	Validation loss: 0.392812	Best loss: 0.085987	Accuracy: 96.60%
14	Validation loss: 0.457959	Best loss: 0.085987	Accuracy: 79.32%
15	Validation loss: 

DNNClassifier(activation=<function elu at 0x7fac55848400>,
       batch_norm_momentum=None, batch_size=20, dropout_rate=None,
       initializer=<function variance_scaling_initializer.<locals>._initializer at 0x7fac41c2dbf8>,
       learning_rate=0.01, n_hidden_layers=5, n_neurons=100,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42)

In [50]:
from sklearn.metrics import accuracy_score

y_pred = dnn_clf.predict(X_test1)
accuracy_score(y_test1, y_pred)

0.98131932282545242

In [None]:
from sklearn.model_selection import RandomizedSearchCV

def leaky_relu(alpha=0.1):
    def parameterized_leaky_relu(z, name=None):
        return tf.maximum(alpha * z, z, name=name)
    return parameterized_leaky_relu


param_distribs = {
    "n_neurons": [10, 30, 50, 70, 90, 100, 120, 140, 160],
    "batch_size": [10, 50, 100, 500],
    "learning_rate": [0.01, 0.02, 0.05, 0.1],
    "activation": [tf.nn.relu, tf.nn.elu, leaky_relu(alpha=0.01), leaky_relu(alpha=0.1)],
    # you could also try exploring different numbers of hidden layers, different optimizers, etc.
    #"n_hidden_layers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    #"optimizer_class": [tf.train.AdamOptimizer, partial(tf.train.MomentumOptimizer, momentum=0.95)],
}



rnd_search = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,
                                fit_params={"X_valid": X_valid1, "y_valid": y_valid1, "n_epochs": 1000},
                                random_state=42, verbose=2)
rnd_search.fit(X_train1, y_train1)



Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] n_neurons=10, learning_rate=0.05, batch_size=100, activation=<function elu at 0x7fac55848400> 
0	Validation loss: 0.128955	Best loss: 0.128955	Accuracy: 96.52%
1	Validation loss: 0.133592	Best loss: 0.128955	Accuracy: 97.03%
2	Validation loss: 0.112978	Best loss: 0.112978	Accuracy: 96.87%
3	Validation loss: 0.125646	Best loss: 0.112978	Accuracy: 96.56%
4	Validation loss: 0.149656	Best loss: 0.112978	Accuracy: 96.44%
5	Validation loss: 0.242129	Best loss: 0.112978	Accuracy: 94.18%
6	Validation loss: 0.800561	Best loss: 0.112978	Accuracy: 58.60%
7	Validation loss: 0.762057	Best loss: 0.112978	Accuracy: 61.34%
8	Validation loss: 0.714322	Best loss: 0.112978	Accuracy: 61.61%
9	Validation loss: 0.733769	Best loss: 0.112978	Accuracy: 59.77%
10	Validation loss: 0.726264	Best loss: 0.112978	Accuracy: 60.75%
11	Validation loss: 0.707577	Best loss: 0.112978	Accuracy: 61.92%
12	Validation loss: 0.714133	Best loss: 0.112978	Accurac

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.1s remaining:    0.0s


0	Validation loss: 0.154466	Best loss: 0.154466	Accuracy: 95.31%
1	Validation loss: 0.110709	Best loss: 0.110709	Accuracy: 96.99%
2	Validation loss: 0.127785	Best loss: 0.110709	Accuracy: 95.86%
3	Validation loss: 0.123204	Best loss: 0.110709	Accuracy: 96.95%
4	Validation loss: 0.125416	Best loss: 0.110709	Accuracy: 96.09%
5	Validation loss: 0.163190	Best loss: 0.110709	Accuracy: 96.40%
6	Validation loss: 0.129181	Best loss: 0.110709	Accuracy: 96.64%
7	Validation loss: 0.150228	Best loss: 0.110709	Accuracy: 96.25%
8	Validation loss: 0.142415	Best loss: 0.110709	Accuracy: 96.72%
9	Validation loss: 0.138197	Best loss: 0.110709	Accuracy: 96.52%
10	Validation loss: 0.145198	Best loss: 0.110709	Accuracy: 97.15%
11	Validation loss: 0.169079	Best loss: 0.110709	Accuracy: 96.91%
12	Validation loss: 0.123248	Best loss: 0.110709	Accuracy: 96.52%
13	Validation loss: 0.177848	Best loss: 0.110709	Accuracy: 96.09%
14	Validation loss: 0.929578	Best loss: 0.110709	Accuracy: 56.53%
15	Validation loss: 

34	Validation loss: 0.091273	Best loss: 0.065762	Accuracy: 98.16%
35	Validation loss: 0.077564	Best loss: 0.065762	Accuracy: 98.67%
36	Validation loss: 0.075940	Best loss: 0.065762	Accuracy: 98.55%
37	Validation loss: 0.078439	Best loss: 0.065762	Accuracy: 98.67%
38	Validation loss: 0.101520	Best loss: 0.065762	Accuracy: 98.59%
39	Validation loss: 0.095619	Best loss: 0.065762	Accuracy: 98.55%
40	Validation loss: 0.076094	Best loss: 0.065762	Accuracy: 98.55%
41	Validation loss: 0.070937	Best loss: 0.065762	Accuracy: 98.63%
Early stopping!
[CV]  n_neurons=30, learning_rate=0.02, batch_size=500, activation=<function relu at 0x7fac5585b158>, total=   6.3s
[CV] n_neurons=30, learning_rate=0.02, batch_size=500, activation=<function relu at 0x7fac5585b158> 
0	Validation loss: 0.121159	Best loss: 0.121159	Accuracy: 96.68%
1	Validation loss: 0.093012	Best loss: 0.093012	Accuracy: 97.11%
2	Validation loss: 0.084517	Best loss: 0.084517	Accuracy: 97.50%
3	Validation loss: 0.070077	Best loss: 0.070

19	Validation loss: 7.296910	Best loss: 0.539918	Accuracy: 61.88%
20	Validation loss: 7.996144	Best loss: 0.539918	Accuracy: 68.26%
21	Validation loss: 6.707133	Best loss: 0.539918	Accuracy: 63.64%
22	Validation loss: 5.725972	Best loss: 0.539918	Accuracy: 71.46%
23	Validation loss: 3.953042	Best loss: 0.539918	Accuracy: 80.41%
24	Validation loss: 9.402796	Best loss: 0.539918	Accuracy: 59.54%
25	Validation loss: 3.536116	Best loss: 0.539918	Accuracy: 76.51%
26	Validation loss: 3.562427	Best loss: 0.539918	Accuracy: 76.74%
Early stopping!
[CV]  n_neurons=90, learning_rate=0.05, batch_size=50, activation=<function leaky_relu.<locals>.parameterized_leaky_relu at 0x7fac461b2ea0>, total=  17.9s
[CV] n_neurons=70, learning_rate=0.1, batch_size=50, activation=<function leaky_relu.<locals>.parameterized_leaky_relu at 0x7fac3d7e3400> 
0	Validation loss: 1.061083	Best loss: 1.061083	Accuracy: 86.55%
1	Validation loss: 2569379.750000	Best loss: 1.061083	Accuracy: 18.73%
2	Validation loss: 11687.4

20	Validation loss: 0.063725	Best loss: 0.045088	Accuracy: 98.48%
21	Validation loss: 0.069764	Best loss: 0.045088	Accuracy: 98.87%
22	Validation loss: 0.077341	Best loss: 0.045088	Accuracy: 98.32%
23	Validation loss: 0.060363	Best loss: 0.045088	Accuracy: 98.98%
24	Validation loss: 0.068150	Best loss: 0.045088	Accuracy: 98.59%
25	Validation loss: 0.061065	Best loss: 0.045088	Accuracy: 98.67%
Early stopping!
[CV]  n_neurons=120, learning_rate=0.01, batch_size=500, activation=<function leaky_relu.<locals>.parameterized_leaky_relu at 0x7fac461b2ea0>, total=  10.6s
[CV] n_neurons=120, learning_rate=0.01, batch_size=500, activation=<function leaky_relu.<locals>.parameterized_leaky_relu at 0x7fac461b2ea0> 
0	Validation loss: 0.123812	Best loss: 0.123812	Accuracy: 95.93%
1	Validation loss: 0.080252	Best loss: 0.080252	Accuracy: 97.73%
2	Validation loss: 0.066269	Best loss: 0.066269	Accuracy: 97.97%
3	Validation loss: 0.064919	Best loss: 0.064919	Accuracy: 98.05%
4	Validation loss: 0.075170	B

5	Validation loss: 0.045372	Best loss: 0.045372	Accuracy: 98.71%
6	Validation loss: 0.039299	Best loss: 0.039299	Accuracy: 98.63%
7	Validation loss: 0.062581	Best loss: 0.039299	Accuracy: 98.36%
8	Validation loss: 0.075106	Best loss: 0.039299	Accuracy: 98.36%
9	Validation loss: 0.057095	Best loss: 0.039299	Accuracy: 98.63%
10	Validation loss: 0.058863	Best loss: 0.039299	Accuracy: 98.67%
11	Validation loss: 0.066973	Best loss: 0.039299	Accuracy: 98.59%
12	Validation loss: 0.059417	Best loss: 0.039299	Accuracy: 98.75%
13	Validation loss: 0.066265	Best loss: 0.039299	Accuracy: 98.55%
14	Validation loss: 0.056666	Best loss: 0.039299	Accuracy: 98.55%
15	Validation loss: 0.072764	Best loss: 0.039299	Accuracy: 98.40%
16	Validation loss: 0.082888	Best loss: 0.039299	Accuracy: 98.40%
17	Validation loss: 0.081743	Best loss: 0.039299	Accuracy: 98.67%
18	Validation loss: 0.072008	Best loss: 0.039299	Accuracy: 98.59%
19	Validation loss: 0.067972	Best loss: 0.039299	Accuracy: 98.71%
20	Validation l

0	Validation loss: 0.122152	Best loss: 0.122152	Accuracy: 95.93%
1	Validation loss: 0.085569	Best loss: 0.085569	Accuracy: 97.03%
2	Validation loss: 0.070516	Best loss: 0.070516	Accuracy: 98.01%
3	Validation loss: 0.049196	Best loss: 0.049196	Accuracy: 98.48%
4	Validation loss: 0.051054	Best loss: 0.049196	Accuracy: 98.71%
5	Validation loss: 0.045649	Best loss: 0.045649	Accuracy: 98.55%
6	Validation loss: 0.050156	Best loss: 0.045649	Accuracy: 98.40%
7	Validation loss: 0.043207	Best loss: 0.043207	Accuracy: 98.59%
8	Validation loss: 0.060060	Best loss: 0.043207	Accuracy: 98.28%
9	Validation loss: 0.042514	Best loss: 0.042514	Accuracy: 98.94%
10	Validation loss: 0.054234	Best loss: 0.042514	Accuracy: 98.83%
11	Validation loss: 0.049459	Best loss: 0.042514	Accuracy: 98.67%
12	Validation loss: 0.047484	Best loss: 0.042514	Accuracy: 98.94%
13	Validation loss: 0.047237	Best loss: 0.042514	Accuracy: 98.91%
14	Validation loss: 0.073716	Best loss: 0.042514	Accuracy: 98.32%
15	Validation loss: 

37	Validation loss: 1.617562	Best loss: 1.609289	Accuracy: 22.01%
38	Validation loss: 1.620767	Best loss: 1.609289	Accuracy: 20.91%
39	Validation loss: 1.627334	Best loss: 1.609289	Accuracy: 19.27%
40	Validation loss: 1.607883	Best loss: 1.607883	Accuracy: 22.01%
41	Validation loss: 1.634031	Best loss: 1.607883	Accuracy: 22.01%
42	Validation loss: 1.618327	Best loss: 1.607883	Accuracy: 20.91%
43	Validation loss: 1.618746	Best loss: 1.607883	Accuracy: 22.01%
44	Validation loss: 1.621665	Best loss: 1.607883	Accuracy: 19.08%
45	Validation loss: 1.629383	Best loss: 1.607883	Accuracy: 22.01%
46	Validation loss: 1.637658	Best loss: 1.607883	Accuracy: 18.73%
47	Validation loss: 1.609600	Best loss: 1.607883	Accuracy: 22.01%
48	Validation loss: 1.612162	Best loss: 1.607883	Accuracy: 22.01%
49	Validation loss: 1.643133	Best loss: 1.607883	Accuracy: 19.27%
50	Validation loss: 1.611987	Best loss: 1.607883	Accuracy: 22.01%
51	Validation loss: 1.619979	Best loss: 1.607883	Accuracy: 19.08%
52	Validat

4	Validation loss: 0.118091	Best loss: 0.074506	Accuracy: 96.33%
5	Validation loss: 0.120820	Best loss: 0.074506	Accuracy: 97.11%
6	Validation loss: 0.084572	Best loss: 0.074506	Accuracy: 97.77%
7	Validation loss: 0.103725	Best loss: 0.074506	Accuracy: 97.22%
8	Validation loss: 0.098401	Best loss: 0.074506	Accuracy: 97.81%
9	Validation loss: 0.107550	Best loss: 0.074506	Accuracy: 97.38%
10	Validation loss: 0.092459	Best loss: 0.074506	Accuracy: 97.69%
11	Validation loss: 0.132714	Best loss: 0.074506	Accuracy: 96.72%
12	Validation loss: 0.089554	Best loss: 0.074506	Accuracy: 97.65%
13	Validation loss: 0.095365	Best loss: 0.074506	Accuracy: 97.38%
14	Validation loss: 0.095531	Best loss: 0.074506	Accuracy: 97.46%
15	Validation loss: 0.088005	Best loss: 0.074506	Accuracy: 97.62%
16	Validation loss: 0.108920	Best loss: 0.074506	Accuracy: 96.60%
17	Validation loss: 0.102420	Best loss: 0.074506	Accuracy: 97.58%
18	Validation loss: 0.075344	Best loss: 0.074506	Accuracy: 97.97%
19	Validation lo

1	Validation loss: 0.133430	Best loss: 0.133430	Accuracy: 96.25%
2	Validation loss: 0.900963	Best loss: 0.133430	Accuracy: 59.85%
3	Validation loss: 0.987884	Best loss: 0.133430	Accuracy: 73.38%
4	Validation loss: 0.206545	Best loss: 0.133430	Accuracy: 94.49%
5	Validation loss: 0.182459	Best loss: 0.133430	Accuracy: 95.31%
6	Validation loss: 0.153164	Best loss: 0.133430	Accuracy: 96.09%
7	Validation loss: 0.143950	Best loss: 0.133430	Accuracy: 95.90%
8	Validation loss: 0.148917	Best loss: 0.133430	Accuracy: 95.78%
9	Validation loss: 0.215165	Best loss: 0.133430	Accuracy: 95.74%
10	Validation loss: 65.845726	Best loss: 0.133430	Accuracy: 32.60%
11	Validation loss: 0.822857	Best loss: 0.133430	Accuracy: 70.84%
12	Validation loss: 1.676334	Best loss: 0.133430	Accuracy: 65.44%
13	Validation loss: 0.551238	Best loss: 0.133430	Accuracy: 81.24%
14	Validation loss: 0.446490	Best loss: 0.133430	Accuracy: 86.00%
15	Validation loss: 0.488140	Best loss: 0.133430	Accuracy: 83.50%
16	Validation loss

3	Validation loss: 814.012390	Best loss: 11.016781	Accuracy: 91.87%
4	Validation loss: 1838.822144	Best loss: 11.016781	Accuracy: 91.20%
5	Validation loss: 291.084991	Best loss: 11.016781	Accuracy: 96.01%
6	Validation loss: 63.564758	Best loss: 11.016781	Accuracy: 96.68%
7	Validation loss: 2975.746582	Best loss: 11.016781	Accuracy: 94.41%
8	Validation loss: 677.254700	Best loss: 11.016781	Accuracy: 95.74%
9	Validation loss: 706.329712	Best loss: 11.016781	Accuracy: 95.86%
10	Validation loss: 29749.259766	Best loss: 11.016781	Accuracy: 88.70%
11	Validation loss: 2572.912109	Best loss: 11.016781	Accuracy: 95.07%
12	Validation loss: 1533.005737	Best loss: 11.016781	Accuracy: 96.83%
13	Validation loss: 4645.341797	Best loss: 11.016781	Accuracy: 96.13%
14	Validation loss: 1819.164185	Best loss: 11.016781	Accuracy: 93.24%
15	Validation loss: 4135.193848	Best loss: 11.016781	Accuracy: 96.79%
16	Validation loss: 3730.346436	Best loss: 11.016781	Accuracy: 95.62%
17	Validation loss: 1700.175781	

7	Validation loss: 0.168744	Best loss: 0.162146	Accuracy: 95.50%
8	Validation loss: 0.203751	Best loss: 0.162146	Accuracy: 94.92%
9	Validation loss: 0.154449	Best loss: 0.154449	Accuracy: 96.25%
10	Validation loss: 0.139675	Best loss: 0.139675	Accuracy: 96.25%
11	Validation loss: 0.120239	Best loss: 0.120239	Accuracy: 96.76%
12	Validation loss: 0.131207	Best loss: 0.120239	Accuracy: 96.52%
13	Validation loss: 0.121818	Best loss: 0.120239	Accuracy: 96.52%
14	Validation loss: 0.133489	Best loss: 0.120239	Accuracy: 96.40%
15	Validation loss: 0.121398	Best loss: 0.120239	Accuracy: 96.99%
16	Validation loss: 0.123193	Best loss: 0.120239	Accuracy: 96.60%
17	Validation loss: 0.122280	Best loss: 0.120239	Accuracy: 96.79%
18	Validation loss: 0.125126	Best loss: 0.120239	Accuracy: 96.95%
19	Validation loss: 0.139385	Best loss: 0.120239	Accuracy: 97.11%
20	Validation loss: 0.142182	Best loss: 0.120239	Accuracy: 97.22%
21	Validation loss: 0.135142	Best loss: 0.120239	Accuracy: 96.79%
22	Validation

8	Validation loss: 295.340820	Best loss: 1.500923	Accuracy: 86.47%
9	Validation loss: 323.967651	Best loss: 1.500923	Accuracy: 84.48%
10	Validation loss: 45948728.000000	Best loss: 1.500923	Accuracy: 20.45%
11	Validation loss: 439319.625000	Best loss: 1.500923	Accuracy: 21.97%
12	Validation loss: 152390.703125	Best loss: 1.500923	Accuracy: 41.36%
13	Validation loss: 42275.097656	Best loss: 1.500923	Accuracy: 50.31%
14	Validation loss: 40290.539062	Best loss: 1.500923	Accuracy: 44.88%
15	Validation loss: 18259.179688	Best loss: 1.500923	Accuracy: 60.09%
16	Validation loss: 297264.875000	Best loss: 1.500923	Accuracy: 60.52%
17	Validation loss: 335260.875000	Best loss: 1.500923	Accuracy: 60.63%
18	Validation loss: 203246.781250	Best loss: 1.500923	Accuracy: 45.74%
19	Validation loss: 131432.187500	Best loss: 1.500923	Accuracy: 63.53%
20	Validation loss: 54310.191406	Best loss: 1.500923	Accuracy: 65.87%
21	Validation loss: 24229.390625	Best loss: 1.500923	Accuracy: 70.05%
Early stopping!
[

12	Validation loss: 5061.779297	Best loss: 0.212176	Accuracy: 89.64%
13	Validation loss: 4330.624512	Best loss: 0.212176	Accuracy: 92.46%
14	Validation loss: 3852.899902	Best loss: 0.212176	Accuracy: 94.53%
15	Validation loss: 3570.245605	Best loss: 0.212176	Accuracy: 94.18%
16	Validation loss: 3018.388672	Best loss: 0.212176	Accuracy: 94.76%
17	Validation loss: 1975.399780	Best loss: 0.212176	Accuracy: 95.19%
18	Validation loss: 3052.223389	Best loss: 0.212176	Accuracy: 92.06%
19	Validation loss: 2076.764160	Best loss: 0.212176	Accuracy: 95.43%
20	Validation loss: 1235.676514	Best loss: 0.212176	Accuracy: 96.01%
21	Validation loss: 2279.253418	Best loss: 0.212176	Accuracy: 91.13%
22	Validation loss: 1225.415771	Best loss: 0.212176	Accuracy: 95.78%
23	Validation loss: 1773.867676	Best loss: 0.212176	Accuracy: 94.57%
24	Validation loss: 1998.083252	Best loss: 0.212176	Accuracy: 93.78%
25	Validation loss: 1409.886963	Best loss: 0.212176	Accuracy: 95.58%
26	Validation loss: 1702.145020	Be

27	Validation loss: 1.997872	Best loss: 1.700591	Accuracy: 18.73%
28	Validation loss: 2.451671	Best loss: 1.700591	Accuracy: 19.08%
29	Validation loss: 2.782164	Best loss: 1.700591	Accuracy: 20.91%
Early stopping!
[CV]  n_neurons=100, learning_rate=0.05, batch_size=10, activation=<function elu at 0x7fac55848400>, total=  57.3s
[CV] n_neurons=90, learning_rate=0.02, batch_size=500, activation=<function elu at 0x7fac55848400> 
0	Validation loss: 0.146549	Best loss: 0.146549	Accuracy: 94.84%
1	Validation loss: 0.094868	Best loss: 0.094868	Accuracy: 96.87%
2	Validation loss: 0.095423	Best loss: 0.094868	Accuracy: 96.56%
3	Validation loss: 0.064079	Best loss: 0.064079	Accuracy: 98.05%
4	Validation loss: 0.064735	Best loss: 0.064079	Accuracy: 97.89%
5	Validation loss: 0.066312	Best loss: 0.064079	Accuracy: 97.89%
6	Validation loss: 0.063926	Best loss: 0.063926	Accuracy: 98.12%
7	Validation loss: 0.064256	Best loss: 0.063926	Accuracy: 98.08%
8	Validation loss: 0.055586	Best loss: 0.055586	Acc

10	Validation loss: 0.586814	Best loss: 0.179809	Accuracy: 74.71%
11	Validation loss: 1.214266	Best loss: 0.179809	Accuracy: 42.46%
12	Validation loss: 1.195771	Best loss: 0.179809	Accuracy: 42.06%
13	Validation loss: 1.192057	Best loss: 0.179809	Accuracy: 38.90%
14	Validation loss: 1.193223	Best loss: 0.179809	Accuracy: 42.06%
15	Validation loss: 1.186450	Best loss: 0.179809	Accuracy: 42.06%
16	Validation loss: 1.188264	Best loss: 0.179809	Accuracy: 42.06%
17	Validation loss: 1.197730	Best loss: 0.179809	Accuracy: 38.90%
18	Validation loss: 1.192550	Best loss: 0.179809	Accuracy: 42.06%
19	Validation loss: 1.186791	Best loss: 0.179809	Accuracy: 42.06%
20	Validation loss: 1.202383	Best loss: 0.179809	Accuracy: 42.06%
21	Validation loss: 1.188658	Best loss: 0.179809	Accuracy: 38.90%
Early stopping!
[CV]  n_neurons=30, learning_rate=0.05, batch_size=50, activation=<function relu at 0x7fac5585b158>, total=   8.2s
[CV] n_neurons=30, learning_rate=0.05, batch_size=50, activation=<function re

In [None]:
rnd_search.best_params_

In [None]:
y_pred = rnd_search.predict(X_test1)
accuracy_score(y_test1, y_pred)

In [None]:
rnd_search.best_estimator_.save('./my_best_mnist_model_0_to_4')

In [None]:
dnn_clf = DNNClassifier(activation=leaky_relu(alpha=0.1), batch_size=500, learning_rate=0.01,
                        n_neurons=140, random_state=42)
dnn_clf.fit(X_train1, y_train1, n_epochs=1000, X_valid=X_valid1, y_valid=y_valid1)

In [None]:
y_pred = dnn_clf_bn.predict(X_test1)
accuracy_score(y_test1, y_pred)

In [None]:

from sklearn.model_selection import RandomizedSearchCV

param_distribs = {
    "n_neurons": [10, 30, 50, 70, 90, 100, 120, 140, 160],
    "batch_size": [10, 50, 100, 500],
    "learning_rate": [0.01, 0.02, 0.05, 0.1],
    "activation": [tf.nn.relu, tf.nn.elu, leaky_relu(alpha=0.01), leaky_relu(alpha=0.1)],
    # you could also try exploring different numbers of hidden layers, different optimizers, etc.
    #"n_hidden_layers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    #"optimizer_class": [tf.train.AdamOptimizer, partial(tf.train.MomentumOptimizer, momentum=0.95)],
    "batch_norm_momentum": [0.9, 0.95, 0.98, 0.99, 0.999],
}

rnd_search_bn = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,
                                   fit_params={"X_valid": X_valid1, "y_valid": y_valid1, "n_epochs": 1000},
                                   random_state=42, verbose=2)
rnd_search_bn.fit(X_train1, y_train1)

In [None]:
rnd_search_bn.best_params_

In [None]:
y_pred = rnd_search_bn.predict(X_test1)
accuracy_score(y_test1, y_pred)

In [None]:
# Check for overfitting
y_pred = dnn_clf.predict(X_train1)
accuracy_score(y_train1, y_pred)

In [None]:
dnn_clf_dropout = DNNClassifier(activation=leaky_relu(alpha=0.1), batch_size=500, learning_rate=0.01,
                                n_neurons=90, random_state=42,
                                dropout_rate=0.5)
dnn_clf_dropout.fit(X_train1, y_train1, n_epochs=1000, X_valid=X_valid1, y_valid=y_valid1)

In [None]:

y_pred = dnn_clf_dropout.predict(X_test1)
accuracy_score(y_test1, y_pred)

In [None]:
from sklearn.model_selection import RandomizedSearchCV

param_distribs = {
    "n_neurons": [10, 30, 50, 70, 90, 100, 120, 140, 160],
    "batch_size": [10, 50, 100, 500],
    "learning_rate": [0.01, 0.02, 0.05, 0.1],
    "activation": [tf.nn.relu, tf.nn.elu, leaky_relu(alpha=0.01), leaky_relu(alpha=0.1)],
    # you could also try exploring different numbers of hidden layers, different optimizers, etc.
    #"n_hidden_layers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    #"optimizer_class": [tf.train.AdamOptimizer, partial(tf.train.MomentumOptimizer, momentum=0.95)],
    "dropout_rate": [0.2, 0.3, 0.4, 0.5, 0.6],
}

rnd_search_dropout = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,
                                        fit_params={"X_valid": X_valid1, "y_valid": y_valid1, "n_epochs": 1000},
                                        random_state=42, verbose=2)
rnd_search_dropout.fit(X_train1, y_train1)

In [None]:
rnd_search_dropout.best_params_

In [None]:
y_pred = rnd_search_dropout.predict(X_test1)
accuracy_score(y_test1, y_pred)

In [None]:
# Transfer Learning
reset_graph()

restore_saver = tf.train.import_meta_graph("./my_best_mnist_model_0_to_4.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
loss = tf.get_default_graph().get_tensor_by_name("loss:0")
Y_proba = tf.get_default_graph().get_tensor_by_name("Y_proba:0")
logits = Y_proba.op.inputs[0]
accuracy = tf.get_default_graph().get_tensor_by_name("accuracy:0")

In [None]:
learning_rate = 0.01

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

In [None]:
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

init = tf.global_variables_initializer()
five_frozen_saver = tf.train.Saver()

In [None]:
# Tensorflow expects labels from 0 to n_classes - 1
X_train2_full = mnist.train.images[mnist.train.labels >= 5]
y_train2_full = mnist.train.labels[mnist.train.labels >= 5] - 5
X_valid2_full = mnist.validation.images[mnist.validation.labels >= 5]
y_valid2_full = mnist.validation.labels[mnist.validation.labels >= 5] - 5
X_test2 = mnist.test.images[mnist.test.labels >= 5]
y_test2 = mnist.test.labels[mnist.test.labels >= 5] - 5

In [None]:
def sample_n_instances_per_class(X, y, n=100):
    Xs, ys = [], []
    for label in np.unique(y):
        idx = (y == label)
        Xc = X[idx][:n]
        yc = y[idx][:n]
        Xs.append(Xc)
        ys.append(yc)
    return np.concatenate(Xs), np.concatenate(ys)

In [None]:
X_train2, y_train2 = sample_n_instances_per_class(X_train2_full, y_train2_full, n=100)
X_valid2, y_valid2 = sample_n_instances_per_class(X_valid2_full, y_valid2_full, n=30)

In [None]:
import time

n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./my_best_mnist_model_0_to_4")
    for var in output_layer_vars:
        var.initializer.run()

    t0 = time.time()
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = five_frozen_saver.save(sess, "./my_mnist_model_5_to_9_five_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_five_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

In [None]:
hidden5_out = tf.get_default_graph().get_tensor_by_name("hidden5_out:0")

In [None]:
import time

n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./my_best_mnist_model_0_to_4")
    for var in output_layer_vars:
        var.initializer.run()

    t0 = time.time()
    
    hidden5_train = hidden5_out.eval(feed_dict={X: X_train2, y: y_train2})
    hidden5_valid = hidden5_out.eval(feed_dict={X: X_valid2, y: y_valid2})
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            h5_batch, y_batch = hidden5_train[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={hidden5_out: h5_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={hidden5_out: hidden5_valid, y: y_valid2})
        if loss_val < best_loss:
            save_path = five_frozen_saver.save(sess, "./my_mnist_model_5_to_9_five_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

    t1 = time.time()
    print("Total training time: {:.1f}s".format(t1 - t0))

with tf.Session() as sess:
    five_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_five_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

In [None]:
reset_graph()

n_outputs = 5

restore_saver = tf.train.import_meta_graph("./my_best_mnist_model_0_to_4.meta")

X = tf.get_default_graph().get_tensor_by_name("X:0")
y = tf.get_default_graph().get_tensor_by_name("y:0")
hidden4_out = tf.get_default_graph().get_tensor_by_name("hidden4_out:0")
logits = tf.layers.dense(hidden4_out, n_outputs, kernel_initializer=he_init, name="new_logits")
Y_proba = tf.nn.softmax(logits)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

In [None]:
learning_rate = 0.01

output_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="new_logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam2")
training_op = optimizer.minimize(loss, var_list=output_layer_vars)

init = tf.global_variables_initializer()
four_frozen_saver = tf.train.Saver()

In [None]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    restore_saver.restore(sess, "./my_best_mnist_model_0_to_4")
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = four_frozen_saver.save(sess, "./my_mnist_model_5_to_9_four_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    four_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_four_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

In [None]:
learning_rate = 0.01

unfrozen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="hidden[34]|new_logits")
optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam3")
training_op = optimizer.minimize(loss, var_list=unfrozen_vars)

init = tf.global_variables_initializer()
two_frozen_saver = tf.train.Saver()

In [None]:

n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    four_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_four_frozen")
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = two_frozen_saver.save(sess, "./my_mnist_model_5_to_9_two_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    two_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_two_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

In [None]:
learning_rate = 0.01

optimizer = tf.train.AdamOptimizer(learning_rate, name="Adam4")
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()
no_frozen_saver = tf.train.Saver()

In [None]:
n_epochs = 1000
batch_size = 20

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()
    two_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_two_frozen")
        
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train2))
        for rnd_indices in np.array_split(rnd_idx, len(X_train2) // batch_size):
            X_batch, y_batch = X_train2[rnd_indices], y_train2[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid2, y: y_valid2})
        if loss_val < best_loss:
            save_path = no_frozen_saver.save(sess, "./my_mnist_model_5_to_9_no_frozen")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    no_frozen_saver.restore(sess, "./my_mnist_model_5_to_9_no_frozen")
    acc_test = accuracy.eval(feed_dict={X: X_test2, y: y_test2})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

In [None]:
dnn_clf_5_to_9 = DNNClassifier(n_hidden_layers=4, random_state=42)
dnn_clf_5_to_9.fit(X_train2, y_train2, n_epochs=1000, X_valid=X_valid2, y_valid=y_valid2)

In [None]:
y_pred = dnn_clf_5_to_9.predict(X_test2)
accuracy_score(y_test2, y_pred)