Problem 1. Load MNIST data and scale it (code provided). Estimate a neural network with: epochs =20, batch_size = 100, and one hidden layer with 400 neurons. Measure and report validation accuracy and estimation time.

In [1]:
import numpy as np
import os
import random
random.seed(42)
np.random.seed(42)
import tensorflow as tf
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

  from ._conv import register_converters as _register_converters


In [2]:
reset_graph()
n_inputs = 28 * 28
n_hidden1 = 400
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")

In [3]:
def leaky_relu(z, name=None):
    return tf.maximum(0.01 * z, z, name=name)

hidden1 = tf.layers.dense(X, n_hidden1, activation=leaky_relu, name="hidden1")

In [4]:
reset_graph()

n_inputs = 28 * 28  
n_hidden1 = 400
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=leaky_relu, name="hidden1")
    logits = tf.layers.dense(hidden1, n_outputs, name="outputs")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [5]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [6]:
n_epochs = 20
batch_size = 100

import time 
t0 = time.time()
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        if epoch % 5 == 0:
            acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
            acc_valid = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
            print(epoch, "Batch accuracy:", acc_batch, "Validation accuracy:", acc_valid)

    save_path = saver.save(sess, "./my_model_final.ckpt")
t1 = time.time()
print("dnn with 1 layer takes {:.2f}s".format(t1 - t0))

0 Batch accuracy: 0.86 Validation accuracy: 0.8736
5 Batch accuracy: 0.89 Validation accuracy: 0.9216
10 Batch accuracy: 0.92 Validation accuracy: 0.9344
15 Batch accuracy: 0.94 Validation accuracy: 0.9426
dnn with 1 layer takes 16.74s


Problem 2. Try adding new layers: estimate two-layer model: [400,100] and a three-layer model [400,100,25], report time and validation accuracy? 

In [7]:
reset_graph()

n_inputs = 28 * 28  
n_hidden1 = 400
n_hidden2 = 100
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
# Set-up two hiddne layers
with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=leaky_relu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=leaky_relu, name="hidden2")
    
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 20
batch_size = 100

In [8]:
t0 = time.time()
with tf.Session() as sess:
    init.run()    
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        if epoch % 5 == 0:
            acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
            acc_valid = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
            print(epoch, "Batch accuracy:", acc_batch, "Validation accuracy:", acc_valid)

    save_path = saver.save(sess, "./my_model_final.ckpt")
t1 = time.time()
print("dnn with 2 layers takes {:.2f}s".format(t1 - t0))

0 Batch accuracy: 0.9 Validation accuracy: 0.8794
5 Batch accuracy: 0.92 Validation accuracy: 0.931
10 Batch accuracy: 0.93 Validation accuracy: 0.9494
15 Batch accuracy: 0.94 Validation accuracy: 0.9596
dnn with 2 layer takes 20.42s


In [9]:
reset_graph()

n_inputs = 28 * 28  
n_hidden1 = 400
n_hidden2 = 100
n_hidden3 = 25
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=leaky_relu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=leaky_relu, name="hidden2")
    hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=leaky_relu, name="hidden3")
    
    logits = tf.layers.dense(hidden3, n_outputs, name="outputs")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 20
batch_size = 100

In [10]:
t0 = time.time()
with tf.Session() as sess:
    init.run()    
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        if epoch % 5 == 0:
            acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
            acc_valid = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
            print(epoch, "Batch accuracy:", acc_batch, "Validation accuracy:", acc_valid)

    save_path = saver.save(sess, "./my_model_final.ckpt")
t1 = time.time()
print("dnn with 3 layers takes {:.2f}s".format(t1 - t0))

0 Batch accuracy: 0.87 Validation accuracy: 0.8792
5 Batch accuracy: 0.92 Validation accuracy: 0.9392
10 Batch accuracy: 0.93 Validation accuracy: 0.956
15 Batch accuracy: 0.95 Validation accuracy: 0.9656
dnn with 3 layer takes 23.01s


Problem 3: Build a long and narrow five-layer network with [100,80,60,40,20] neurons on the same data. Report accuracy and time.

In [11]:
reset_graph()

n_inputs = 28 * 28  
n_hidden1 = 100
n_hidden2 = 80
n_hidden3 = 60
n_hidden4 = 40
n_hidden5 = 20
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
# Set-up two hiddne layers
with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=leaky_relu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=leaky_relu, name="hidden2")
    hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=leaky_relu, name="hidden3")
    hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=leaky_relu, name="hidden4")
    hidden5 = tf.layers.dense(hidden4, n_hidden5, activation=leaky_relu, name="hidden5")

    logits = tf.layers.dense(hidden5, n_outputs, name="outputs")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 20
batch_size = 100

In [12]:
t0 = time.time()
with tf.Session() as sess:
    init.run()    
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        if epoch % 5 == 0:
            acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
            acc_valid = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
            print(epoch, "Batch accuracy:", acc_batch, "Validation accuracy:", acc_valid)

    save_path = saver.save(sess, "./my_model_final.ckpt")
t1 = time.time()
print("dnn with 5 layers takes {:.2f}s".format(t1 - t0))

0 Batch accuracy: 0.77 Validation accuracy: 0.7872
5 Batch accuracy: 0.94 Validation accuracy: 0.936
10 Batch accuracy: 0.97 Validation accuracy: 0.957
15 Batch accuracy: 0.95 Validation accuracy: 0.9624
dnn with 5 layers takes 13.89s


Problem 4: Estimate a neural network with: epochs = 20, batch_size = 100. Try to find approximately optimal number of neurons using randomize grid search. Draw 20 random numbers from the [20,1000] neuron space. Measure and report highest validation accuracy and estimation time of the best model. 

In [13]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.exceptions import NotFittedError
he_init = tf.variance_scaling_initializer()

class DNNClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, n_hidden_layers=3, n_neurons=100, optimizer_class=tf.train.AdamOptimizer,
                 learning_rate=0.01, batch_size=100, activation=leaky_relu, initializer=he_init,
                 batch_norm_momentum=None, dropout_rate=None, random_state=None):
        """Initialize the DNNClassifier by simply storing all the hyperparameters."""
        self.n_hidden_layers = n_hidden_layers
        self.n_neurons = n_neurons
        self.optimizer_class = optimizer_class
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.activation = activation
        self.initializer = initializer
        self.batch_norm_momentum = batch_norm_momentum
        self.dropout_rate = dropout_rate
        self.random_state = random_state
        self._session = None

    def _dnn(self, inputs):
        """Build the hidden layers, with support for batch normalization and dropout."""
        for layer in range(self.n_hidden_layers):
            if self.dropout_rate:
                inputs = tf.layers.dropout(inputs, self.dropout_rate, training=self._training)
            inputs = tf.layers.dense(inputs, self.n_neurons,
                                     kernel_initializer=self.initializer,
                                     name="hidden%d" % (layer + 1))
            if self.batch_norm_momentum:
                inputs = tf.layers.batch_normalization(inputs, momentum=self.batch_norm_momentum,
                                                       training=self._training)
            inputs = self.activation(inputs, name="hidden%d_out" % (layer + 1))
        return inputs

    def _build_graph(self, n_inputs, n_outputs):
        """Build the same model as earlier"""
        if self.random_state is not None:
            tf.set_random_seed(self.random_state)
            np.random.seed(self.random_state)

        X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
        y = tf.placeholder(tf.int32, shape=(None), name="y")

        if self.batch_norm_momentum or self.dropout_rate:
            self._training = tf.placeholder_with_default(False, shape=(), name='training')
        else:
            self._training = None

        dnn_outputs = self._dnn(X)

        logits = tf.layers.dense(dnn_outputs, n_outputs, kernel_initializer=he_init, name="logits")
        Y_proba = tf.nn.softmax(logits, name="Y_proba")

        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                                  logits=logits)
        loss = tf.reduce_mean(xentropy, name="loss")

        optimizer = self.optimizer_class(learning_rate=self.learning_rate)
        training_op = optimizer.minimize(loss)

        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        # Make the important operations available easily through instance variables
        self._X, self._y = X, y
        self._Y_proba, self._loss = Y_proba, loss
        self._training_op, self._accuracy = training_op, accuracy
        self._init, self._saver = init, saver

    def close_session(self):
        if self._session:
            self._session.close()

    def _get_model_params(self):
        """Get all variable values (used for early stopping, faster than saving to disk)"""
        with self._graph.as_default():
            gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        return {gvar.op.name: value for gvar, value in zip(gvars, self._session.run(gvars))}

    def _restore_model_params(self, model_params):
        """Set all variables to the given values (for early stopping, faster than loading from disk)"""
        gvar_names = list(model_params.keys())
        assign_ops = {gvar_name: self._graph.get_operation_by_name(gvar_name + "/Assign")
                      for gvar_name in gvar_names}
        init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
        feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
        self._session.run(assign_ops, feed_dict=feed_dict)

    def fit(self, X, y, n_epochs=100, X_valid=None, y_valid=None):
        """Fit the model to the training set. If X_valid and y_valid are provided, use early stopping."""
        self.close_session()

        # infer n_inputs and n_outputs from the training set.
        n_inputs = X.shape[1]
        self.classes_ = np.unique(y)
        n_outputs = len(self.classes_)
        
        # Translate the labels vector to a vector of sorted class indices, containing
        # integers from 0 to n_outputs - 1.
        # For example, if y is equal to [8, 8, 9, 5, 7, 6, 6, 6], then the sorted class
        # labels (self.classes_) will be equal to [5, 6, 7, 8, 9], and the labels vector
        # will be translated to [3, 3, 4, 0, 2, 1, 1, 1]
        self.class_to_index_ = {label: index
                                for index, label in enumerate(self.classes_)}
        y = np.array([self.class_to_index_[label]
                      for label in y], dtype=np.int32)
        
        self._graph = tf.Graph()
        with self._graph.as_default():
            self._build_graph(n_inputs, n_outputs)
            # extra ops for batch normalization
            extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        # needed in case of early stopping
        max_checks_without_progress = 20
        checks_without_progress = 0
        best_loss = np.infty
        best_params = None
        
        # Now train the model!
        self._session = tf.Session(graph=self._graph)
        with self._session.as_default() as sess:
            self._init.run()
            for epoch in range(n_epochs):
                rnd_idx = np.random.permutation(len(X))
                for rnd_indices in np.array_split(rnd_idx, len(X) // self.batch_size):
                    X_batch, y_batch = X[rnd_indices], y[rnd_indices]
                    feed_dict = {self._X: X_batch, self._y: y_batch}
                    if self._training is not None:
                        feed_dict[self._training] = True
                    sess.run(self._training_op, feed_dict=feed_dict)
                    if extra_update_ops:
                        sess.run(extra_update_ops, feed_dict=feed_dict)
                if X_valid is not None and y_valid is not None:
                    loss_val, acc_val = sess.run([self._loss, self._accuracy],
                                                 feed_dict={self._X: X_valid,
                                                            self._y: y_valid})
                    if loss_val < best_loss:
                        best_params = self._get_model_params()
                        best_loss = loss_val
                        checks_without_progress = 0
                    else:
                        checks_without_progress += 1
                    print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_val, best_loss, acc_val * 100))
                    if checks_without_progress > max_checks_without_progress:
                        print("Early stopping!")
                        break
                else:
                    loss_train, acc_train = sess.run([self._loss, self._accuracy],
                                                     feed_dict={self._X: X_batch,
                                                                self._y: y_batch})
                    print("{}\tLast training batch loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_train, acc_train * 100))
            # If we used early stopping then rollback to the best model found
            if best_params:
                self._restore_model_params(best_params)
            return self

    def predict_proba(self, X):
        if not self._session:
            raise NotFittedError("This %s instance is not fitted yet" % self.__class__.__name__)
        with self._session.as_default() as sess:
            return self._Y_proba.eval(feed_dict={self._X: X})

    def predict(self, X):
        class_indices = np.argmax(self.predict_proba(X), axis=1)
        return np.array([[self.classes_[class_index]]
                         for class_index in class_indices], np.int32)

    def save(self, path):
        self._saver.save(self._session, path)

In [14]:
random.seed(42)
np.random.seed(42)
a = [random.randint(200,1000) for _ in range(10)]
print(a)

[854, 314, 225, 959, 481, 450, 428, 342, 954, 304]


In [15]:
from sklearn.model_selection import RandomizedSearchCV

param_distribs = {
    "n_neurons": [854, 314, 225, 959, 481, 450, 428, 342, 954, 304]
}

rnd_search = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs,
                                random_state=42, verbose=2)
rnd_search.fit(X_train, y_train,n_epochs=20)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] n_neurons=854 ...................................................
0	Last training batch loss: 0.114206	Accuracy: 97.00%
1	Last training batch loss: 0.076683	Accuracy: 97.00%
2	Last training batch loss: 0.049317	Accuracy: 99.00%
3	Last training batch loss: 0.022383	Accuracy: 100.00%
4	Last training batch loss: 0.137289	Accuracy: 95.00%
5	Last training batch loss: 0.106348	Accuracy: 96.00%
6	Last training batch loss: 0.047798	Accuracy: 99.00%
7	Last training batch loss: 2.489443	Accuracy: 97.00%
8	Last training batch loss: 1.386411	Accuracy: 95.00%
9	Last training batch loss: 0.254281	Accuracy: 97.00%
10	Last training batch loss: 0.654841	Accuracy: 95.00%
11	Last training batch loss: 1.117679	Accuracy: 96.00%
12	Last training batch loss: 0.178700	Accuracy: 97.00%
13	Last training batch loss: 0.143237	Accuracy: 99.00%
14	Last training batch loss: 0.242001	Accuracy: 99.00%
15	Last training batch loss: 0.023544	Accuracy: 98.0

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.5min remaining:    0.0s


0	Last training batch loss: 0.128309	Accuracy: 97.00%
1	Last training batch loss: 0.090703	Accuracy: 96.00%
2	Last training batch loss: 0.123286	Accuracy: 96.00%
3	Last training batch loss: 0.643929	Accuracy: 86.00%
4	Last training batch loss: 0.241853	Accuracy: 97.00%
5	Last training batch loss: 1.529591	Accuracy: 87.00%
6	Last training batch loss: 0.187419	Accuracy: 97.00%
7	Last training batch loss: 0.184723	Accuracy: 96.00%
8	Last training batch loss: 0.009511	Accuracy: 99.00%
9	Last training batch loss: 0.069282	Accuracy: 98.00%
10	Last training batch loss: 0.216791	Accuracy: 96.00%
11	Last training batch loss: 0.000948	Accuracy: 100.00%
12	Last training batch loss: 0.337551	Accuracy: 93.00%
13	Last training batch loss: 2.348550	Accuracy: 95.00%
14	Last training batch loss: 1.269276	Accuracy: 98.00%
15	Last training batch loss: 0.750924	Accuracy: 96.00%
16	Last training batch loss: 0.332284	Accuracy: 97.00%
17	Last training batch loss: 0.006970	Accuracy: 99.00%
18	Last training ba

15	Last training batch loss: 0.011743	Accuracy: 100.00%
16	Last training batch loss: 0.004094	Accuracy: 100.00%
17	Last training batch loss: 0.012326	Accuracy: 100.00%
18	Last training batch loss: 0.030771	Accuracy: 99.00%
19	Last training batch loss: 0.020474	Accuracy: 99.00%
[CV] .................................... n_neurons=225, total=  17.5s
[CV] n_neurons=225 ...................................................
0	Last training batch loss: 0.124013	Accuracy: 95.00%
1	Last training batch loss: 0.197797	Accuracy: 96.00%
2	Last training batch loss: 0.095004	Accuracy: 98.00%
3	Last training batch loss: 0.062106	Accuracy: 98.00%
4	Last training batch loss: 0.056698	Accuracy: 98.00%
5	Last training batch loss: 0.008585	Accuracy: 100.00%
6	Last training batch loss: 0.088867	Accuracy: 98.00%
7	Last training batch loss: 0.066122	Accuracy: 99.00%
8	Last training batch loss: 0.006340	Accuracy: 100.00%
9	Last training batch loss: 0.040485	Accuracy: 98.00%
10	Last training batch loss: 0.039221	

7	Last training batch loss: 0.258909	Accuracy: 94.00%
8	Last training batch loss: 0.000573	Accuracy: 100.00%
9	Last training batch loss: 0.022138	Accuracy: 99.00%
10	Last training batch loss: 0.072710	Accuracy: 97.00%
11	Last training batch loss: 0.205445	Accuracy: 95.00%
12	Last training batch loss: 0.002122	Accuracy: 100.00%
13	Last training batch loss: 0.124242	Accuracy: 98.00%
14	Last training batch loss: 0.012504	Accuracy: 100.00%
15	Last training batch loss: 0.020698	Accuracy: 99.00%
16	Last training batch loss: 0.113475	Accuracy: 98.00%
17	Last training batch loss: 0.131405	Accuracy: 97.00%
18	Last training batch loss: 0.476207	Accuracy: 96.00%
19	Last training batch loss: 0.297811	Accuracy: 97.00%
[CV] .................................... n_neurons=481, total=  34.9s
[CV] n_neurons=450 ...................................................
0	Last training batch loss: 0.155601	Accuracy: 95.00%
1	Last training batch loss: 0.104079	Accuracy: 97.00%
2	Last training batch loss: 0.02461

0	Last training batch loss: 0.084406	Accuracy: 98.00%
1	Last training batch loss: 0.063944	Accuracy: 98.00%
2	Last training batch loss: 0.034293	Accuracy: 99.00%
3	Last training batch loss: 0.052329	Accuracy: 98.00%
4	Last training batch loss: 0.043160	Accuracy: 100.00%
5	Last training batch loss: 0.040731	Accuracy: 99.00%
6	Last training batch loss: 0.092132	Accuracy: 97.00%
7	Last training batch loss: 0.082004	Accuracy: 96.00%
8	Last training batch loss: 0.136314	Accuracy: 97.00%
9	Last training batch loss: 0.066306	Accuracy: 99.00%
10	Last training batch loss: 0.085166	Accuracy: 97.00%
11	Last training batch loss: 0.098136	Accuracy: 99.00%
12	Last training batch loss: 0.034814	Accuracy: 98.00%
13	Last training batch loss: 0.111521	Accuracy: 97.00%
14	Last training batch loss: 0.028654	Accuracy: 99.00%
15	Last training batch loss: 0.004516	Accuracy: 100.00%
16	Last training batch loss: 0.157538	Accuracy: 96.00%
17	Last training batch loss: 0.049735	Accuracy: 97.00%
18	Last training b

15	Last training batch loss: 0.054441	Accuracy: 99.00%
16	Last training batch loss: 0.053357	Accuracy: 98.00%
17	Last training batch loss: 0.043452	Accuracy: 99.00%
18	Last training batch loss: 0.062158	Accuracy: 98.00%
19	Last training batch loss: 0.094773	Accuracy: 98.00%
[CV] .................................... n_neurons=304, total=  21.2s
[CV] n_neurons=304 ...................................................
0	Last training batch loss: 0.147091	Accuracy: 98.00%
1	Last training batch loss: 0.133367	Accuracy: 96.00%
2	Last training batch loss: 0.061464	Accuracy: 98.00%
3	Last training batch loss: 0.099957	Accuracy: 97.00%
4	Last training batch loss: 0.051768	Accuracy: 97.00%
5	Last training batch loss: 0.016409	Accuracy: 100.00%
6	Last training batch loss: 0.293977	Accuracy: 95.00%
7	Last training batch loss: 0.029524	Accuracy: 98.00%
8	Last training batch loss: 0.009401	Accuracy: 100.00%
9	Last training batch loss: 0.095306	Accuracy: 98.00%
10	Last training batch loss: 0.037939	Acc

[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed: 24.2min finished


0	Last training batch loss: 0.161759	Accuracy: 94.00%
1	Last training batch loss: 0.109485	Accuracy: 97.00%
2	Last training batch loss: 0.132083	Accuracy: 97.00%
3	Last training batch loss: 0.038225	Accuracy: 99.00%
4	Last training batch loss: 0.063873	Accuracy: 97.00%
5	Last training batch loss: 0.065343	Accuracy: 98.00%
6	Last training batch loss: 0.031937	Accuracy: 99.00%
7	Last training batch loss: 0.039774	Accuracy: 99.00%
8	Last training batch loss: 0.018023	Accuracy: 99.00%
9	Last training batch loss: 0.045555	Accuracy: 99.00%
10	Last training batch loss: 0.148827	Accuracy: 96.00%
11	Last training batch loss: 0.037689	Accuracy: 98.00%
12	Last training batch loss: 0.031276	Accuracy: 99.00%
13	Last training batch loss: 0.047324	Accuracy: 98.00%
14	Last training batch loss: 0.001200	Accuracy: 100.00%
15	Last training batch loss: 0.087095	Accuracy: 96.00%
16	Last training batch loss: 0.020970	Accuracy: 100.00%
17	Last training batch loss: 0.141498	Accuracy: 97.00%
18	Last training b

RandomizedSearchCV(cv=None, error_score='raise',
          estimator=DNNClassifier(activation=<function leaky_relu at 0xb18e4e268>,
       batch_norm_momentum=None, batch_size=100, dropout_rate=None,
       initializer=<tensorflow.python.ops.init_ops.VarianceScaling object at 0x1054146a0>,
       learning_rate=0.01, n_hidden_layers=3, n_neurons=100,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42),
          fit_params=None, iid=True, n_iter=10, n_jobs=1,
          param_distributions={'n_neurons': [854, 314, 225, 959, 481, 450, 428, 342, 954, 304]},
          pre_dispatch='2*n_jobs', random_state=42, refit=True,
          return_train_score='warn', scoring=None, verbose=2)

In [16]:
from sklearn.metrics import accuracy_score
y_pred = rnd_search.predict(X_valid)
accuracy_score(y_valid, y_pred)

0.9668

In [18]:
rnd_search.best_params_

{'n_neurons': 314}

In [23]:
t0 = time.time()
dnn_clf_bn = DNNClassifier(n_neurons=314, random_state=42)
dnn_clf_bn.fit(X_train, y_train, n_epochs=20)
t1 = time.time()
print("dnn with 314 neuron spaces takes {:.2f}s".format(t1 - t0))

0	Last training batch loss: 0.161759	Accuracy: 94.00%
1	Last training batch loss: 0.109485	Accuracy: 97.00%
2	Last training batch loss: 0.132083	Accuracy: 97.00%
3	Last training batch loss: 0.038225	Accuracy: 99.00%
4	Last training batch loss: 0.063873	Accuracy: 97.00%
5	Last training batch loss: 0.065343	Accuracy: 98.00%
6	Last training batch loss: 0.031937	Accuracy: 99.00%
7	Last training batch loss: 0.039774	Accuracy: 99.00%
8	Last training batch loss: 0.018023	Accuracy: 99.00%
9	Last training batch loss: 0.045555	Accuracy: 99.00%
10	Last training batch loss: 0.148827	Accuracy: 96.00%
11	Last training batch loss: 0.037689	Accuracy: 98.00%
12	Last training batch loss: 0.031276	Accuracy: 99.00%
13	Last training batch loss: 0.047324	Accuracy: 98.00%
14	Last training batch loss: 0.001200	Accuracy: 100.00%
15	Last training batch loss: 0.087095	Accuracy: 96.00%
16	Last training batch loss: 0.020970	Accuracy: 100.00%
17	Last training batch loss: 0.141498	Accuracy: 97.00%
18	Last training b

Problem 5.
Go back to the baseline model 400 neurons one level. What is the time cost of increasing batch size by one observation? . What is the time cost of adding one more epoch? 

In [24]:
reset_graph()

n_inputs = 28 * 28  
n_hidden1 = 400
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=leaky_relu, name="hidden1")
    logits = tf.layers.dense(hidden1, n_outputs, name="outputs")

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [25]:
n_epochs = 21
batch_size = 100

import time 
t0 = time.time()
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        if epoch % 5 == 0:
            acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
            acc_valid = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
            print(epoch, "Batch accuracy:", acc_batch, "Validation accuracy:", acc_valid)

    save_path = saver.save(sess, "./my_model_final.ckpt")
t1 = time.time()
print("dnn with 1 more epoch takes {:.2f}s".format(t1 - t0))

0 Batch accuracy: 0.86 Validation accuracy: 0.8736
5 Batch accuracy: 0.89 Validation accuracy: 0.9216
10 Batch accuracy: 0.92 Validation accuracy: 0.9344
15 Batch accuracy: 0.94 Validation accuracy: 0.9426
20 Batch accuracy: 0.95 Validation accuracy: 0.9482
dnn with 1 more epoch takes 18.22s


In [27]:
n_epochs = 20
batch_size = 101

import time 
t0 = time.time()
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        if epoch % 5 == 0:
            acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
            acc_valid = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
            print(epoch, "Batch accuracy:", acc_batch, "Validation accuracy:", acc_valid)

    save_path = saver.save(sess, "./my_model_final.ckpt")
t1 = time.time()
print("dnn with 1 more batch takes {:.2f}s".format(t1 - t0))

0 Batch accuracy: 0.8811881 Validation accuracy: 0.8724
5 Batch accuracy: 0.9207921 Validation accuracy: 0.9224
10 Batch accuracy: 0.96039605 Validation accuracy: 0.9354
15 Batch accuracy: 0.9405941 Validation accuracy: 0.942
dnn with 1 more batch takes 17.48s


Problem 6. Find change in accuracy and time if you double your batch size to 200? Find change in accuracy and time if increase the number of epochs to 40? Which is the better approach? Report the gain the accuracy and the cost in time for each approach?

In [31]:
n_epochs = 20
batch_size = 200

import time 
t0 = time.time()
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        if epoch % 5 == 0:
            acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
            acc_valid = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
            print(epoch, "Batch accuracy:", acc_batch, "Validation accuracy:", acc_valid)

    save_path = saver.save(sess, "./my_model_final.ckpt")
t1 = time.time()
print("dnn with 200 batchs takes {:.2f}s".format(t1 - t0))

0 Batch accuracy: 0.865 Validation accuracy: 0.8392
5 Batch accuracy: 0.87 Validation accuracy: 0.9062
10 Batch accuracy: 0.91 Validation accuracy: 0.9214
15 Batch accuracy: 0.925 Validation accuracy: 0.9288
dnn with 200 batchs takes 14.13s


In [30]:
n_epochs = 40
batch_size = 100

import time 
t0 = time.time()
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        if epoch % 5 == 0:
            acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
            acc_valid = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
            print(epoch, "Batch accuracy:", acc_batch, "Validation accuracy:", acc_valid)

    save_path = saver.save(sess, "./my_model_final.ckpt")
t1 = time.time()
print("dnn with 40 epochs takes {:.2f}s".format(t1 - t0))

0 Batch accuracy: 0.88 Validation accuracy: 0.8726
5 Batch accuracy: 0.89 Validation accuracy: 0.9228
10 Batch accuracy: 0.96 Validation accuracy: 0.9352
15 Batch accuracy: 0.92 Validation accuracy: 0.9428
20 Batch accuracy: 0.95 Validation accuracy: 0.9498
25 Batch accuracy: 0.91 Validation accuracy: 0.9534
30 Batch accuracy: 0.89 Validation accuracy: 0.9568
35 Batch accuracy: 0.96 Validation accuracy: 0.9602
dnn with 40 epochs takes 33.89s
