In [61]:
import os
import tensorflow as tf
import numpy as np
import math
import matplotlib.pyplot as plt
from tqdm import tnrange, tqdm_notebook

%matplotlib inline

Import the data

In [62]:
def load_cifar10(num_training=49000, num_validation=1000, num_test=10000):
    """
    Fetch the CIFAR-10 dataset from the web and perform preprocessing to prepare
    it.  We pad the training data here for the data augmentation
    """
    # Load the raw CIFAR-10 dataset and use appropriate data types and shapes
    cifar10 = tf.keras.datasets.cifar10.load_data()
    (X_train, y_train), (X_test, y_test) = cifar10
    X_train = np.asarray(X_train, dtype=np.float32)
    y_train = np.asarray(y_train, dtype=np.int32).flatten()
    X_test = np.asarray(X_test, dtype=np.float32)
    y_test = np.asarray(y_test, dtype=np.int32).flatten()

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean pixel and divide by std
    mean_pixel = X_train.mean(axis=(0, 1, 2), keepdims=True)
    std_pixel = X_train.std(axis=(0, 1, 2), keepdims=True)
    X_train = (X_train - mean_pixel) / std_pixel
    X_val = (X_val - mean_pixel) / std_pixel
    X_test = (X_test - mean_pixel) / std_pixel
    
    #Pad the data by 4 on height and width
    paddings = [[0, 0,], [4, 4], [4, 4], [0, 0]]
    X_train = np.pad(X_train, paddings, 'constant')
    X_val = np.pad(X_val, paddings, 'constant')
    X_test = np.pad(X_test, paddings, 'constant')
    
    return X_train, y_train, X_val, y_val, X_test, y_test


# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test = load_cifar10()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape, y_train.dtype)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Train data shape:  (49000, 40, 40, 3)
Train labels shape:  (49000,) int32
Validation data shape:  (1000, 40, 40, 3)
Validation labels shape:  (1000,)
Test data shape:  (10000, 40, 40, 3)
Test labels shape:  (10000,)


Create a basic dataset object

In [63]:
class Dataset(object):
    def __init__(self, X, y, batch_size, shuffle=False):
        """
        Construct a Dataset object to iterate over data X and labels y
        
        Inputs:
        - X: Numpy array of data, of any shape
        - y: Numpy array of labels, of any shape but with y.shape[0] == X.shape[0]
        - batch_size: Integer giving number of elements per minibatch
        - shuffle: (optional) Boolean, whether to shuffle the data on each epoch
        """
        assert X.shape[0] == y.shape[0], 'Got different numbers of data and labels'
        self.X, self.y = X, y
        self.batch_size, self.shuffle = batch_size, shuffle

    def __iter__(self):
        N, B = self.X.shape[0], self.batch_size
        idxs = np.arange(N)
        X = self.X
        if self.shuffle:
            np.random.shuffle(idxs)
            
        return iter((X[i:i+B], self.y[i:i+B]) for i in range(0, N, B))
    
    def __len__(self):
        return len(self.y) // self.batch_size
    

batch_size = 64
train_dset = Dataset(X_train, y_train, batch_size=batch_size, shuffle=True)
val_dset = Dataset(X_val, y_val, batch_size=batch_size, shuffle=False)
test_dset = Dataset(X_test, y_test, batch_size=batch_size)

Create our neutral network

In [64]:
initializer = tf.contrib.layers.variance_scaling_initializer()

#Helper layer function
def batch_norm_relu_conv2d_drop(inputs, filters, is_training, dropout, stride=1, reg=1e-2):
    inputs = tf.layers.batch_normalization(inputs, training = is_training)
    inputs = tf.nn.relu(inputs)
    inputs = tf.layers.conv2d(inputs, filters, 3, strides=stride, padding="same", kernel_initializer=initializer,
                           kernel_regularizer=tf.keras.regularizers.l2(l=reg))
    if dropout is not None:
        inputs = tf.nn.dropout(inputs, keep_prob=dropout)
    return inputs

def subsample_conv2d(inputs, filters, is_training, stride=2, reg=1e-2):
    return tf.layers.conv2d(inputs, filters, 3, strides=stride, padding="same", kernel_initializer=initializer,
                            kernel_regularizer=tf.keras.regularizers.l2(l=reg))

#Resnet unit
def ResNet_unit(inputs, filters, is_training, dropout, i, j, subsample=False, reg=1e-2):
    with tf.variable_scope(f"conv{i+2}_{j+1}"):
        shortcut = inputs
        stride = 2 if subsample else 1
        
        inputs = batch_norm_relu_conv2d_drop(inputs, filters, is_training, dropout, stride=stride, reg=reg)
        inputs = batch_norm_relu_conv2d_drop(inputs, filters, is_training, dropout, reg=reg)
        
        if subsample:
            paddings = tf.constant([[0,0], [0,0], [0,0], [0, filters // 2]])
            shortcut = tf.pad(shortcut, paddings)
            #reduce image height and width by striding as in resnet paper
            shortcut = shortcut[:, ::2, ::2, :]
    
        return shortcut + inputs
    
def model_ResNetv2(inputs, is_training, total_layers=20, dropout=None, num_classes=10, reg=1e-2):
    num_layers = (total_layers - 2) // 6
    filters = [16, 32, 64]
    if dropout == 1: dropout = None
    
    with tf.variable_scope("data_augmentation"):
        if is_training == True:
            inputs = tf.image.random_flip_left_right(inputs)

            #random crop back to 32x32 (training data padded when preprocessed)
            inputs = tf.random_crop(inputs, [inputs.shape[0], 32, 32, 3])
        else:
            #central crop back to 32x32
            inputs = inputs[:, 4:36, 4:36, :]
    
    #first do a single convolution ResNet_unit with no addition
    with tf.variable_scope("conv1"):
        inputs = batch_norm_relu_conv2d_drop(inputs, filters[0], is_training, dropout, reg=reg)
    
    #now some ResNet units
    
    for i in range(3):
        for j in range(num_layers):
            #don't subsample on first go round
            subsample = i > 0 and j == 0
            inputs = ResNet_unit(inputs, filters[i], is_training,
                                 dropout, i, j, subsample=subsample, reg=reg)
    
    #Finally global average pooling, 10 way FC layer and then output to scores.
    #Global average pooling is same as doing reduce_mean
    inputs = tf.reduce_mean(inputs, axis=[1,2])
    inputs = tf.layers.flatten(inputs)
    scores = tf.layers.dense(inputs, num_classes, kernel_initializer=initializer,
                            kernel_regularizer=tf.keras.regularizers.l2(l=reg))
    return scores 

A small test to check that our neutral network works correctly

In [65]:
def test_model_ResNet_fc():
    """ A small unit test for model_ResNetv2 above. """
    tf.reset_default_graph()

    x = tf.zeros((50, 40, 40, 3))
    scores = model_ResNetv2(x, 1)
        
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        scores_np = sess.run(scores)
        print(scores_np.shape)
        
test_model_ResNet_fc()

(50, 10)


In [66]:
def check_acc_tb(sess, dset, x, scores, is_training, FLAG_print=True):
    """
    Check accuracy on a classification model.
    
    Inputs:
    - sess: A TensorFlow Session that will be used to run the graph
    - dset: A Dataset object on which to check accuracy
    - x: A TensorFlow placeholder Tensor where input images should be fed
    - scores: A TensorFlow Tensor representing the scores output from the
      model; this is the Tensor we will ask TensorFlow to evaluate.
      
    Returns: Accuracy of the model
    """
    num_correct, num_samples = 0, 0
    with tf.name_scope('accuracy'):
        for x_batch, y_batch in dset:
            feed_dict = {x: x_batch, is_training: 0}
            scores_np = sess.run(scores, feed_dict=feed_dict)
            y_pred = scores_np.argmax(axis=1)
            num_samples += x_batch.shape[0]
            num_correct += (y_pred == y_batch).sum()
        acc = float(num_correct) / num_samples
        if FLAG_print == True:
            print('Got %d / %d correct (%.2f%%)' % (num_correct, num_samples, 100 * acc))
    return acc

In [67]:
def check_acc_train(sess, x_batch, y_batch, x, scores, is_training, FLAG_print=True):
    """
    Check accuracy on a classification model from a batch of data.
    
    Inputs:
    - sess: A TensorFlow Session that will be used to run the graph
    - dset: A Dataset object on which to check accuracy
    - x: A TensorFlow placeholder Tensor where input images should be fed
    - scores: A TensorFlow Tensor representing the scores output from the
      model; this is the Tensor we will ask TensorFlow to evaluate.
      
    Returns: Accuracy of the model
    """
    num_correct, num_samples = 0, 0
    with tf.name_scope('accuracy'):
        feed_dict = {x: x_batch, is_training: 0}
        scores_np = sess.run(scores, feed_dict=feed_dict)
        y_pred = scores_np.argmax(axis=1)
        acc = float((y_pred == y_batch).sum()) / x_batch.shape[0]
        if FLAG_print == True:
            print('Got %d / %d correct (%.2f%%)' % (num_correct, num_samples, 100 * acc))
    return acc

In [68]:
def train_part5(model_init_fn, optimizer_init_fn, num_epochs=1, decay_at=None, decay_to=None,
                experiment_name="", restore=False, epoch_num=1):
    """
    Simple training loop for use with models defined using tf.keras. It trains
    a model for one epoch on the CIFAR-10 training set and periodically checks
    accuracy on the CIFAR-10 validation set.
    
    Inputs:
    - model_init_fn: A function that takes no parameters; when called it
      constructs the model we want to train: model = model_init_fn()
    - optimizer_init_fn: A function which takes no parameters; when called it
      constructs the Optimizer object we will use to optimize the model:
      optimizer = optimizer_init_fn()
    - num_epochs: The number of epochs to train for
    - decay_at: Epochs to decay the learning rate
    - decay_by: new_learning_rate := learning_rate * decay_by
    
    Returns: Nothing, but prints progress during training
    """
    tf.reset_default_graph()
    
    # declare placeholders
    x = tf.placeholder(tf.float32, [None, 40, 40, 3])
    y = tf.placeholder(tf.int32, [None])
    is_training = tf.placeholder(tf.bool, name='is_training')

    # Whenever you need to record the loss, feed the mean test accuracy to this placeholder
    with tf.name_scope('error'):
        tf_error_ph = tf.placeholder(tf.float32,shape=None, name='error_summary')
        # Create a scalar summary object for the accuracy so it can be displayed
        tf.summary.scalar('error', tf_error_ph)

    # Use the model function to build the forward pass.
    scores = model_init_fn(x, is_training)

    # Compute the loss
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=scores)
    loss_reg = tf.losses.get_regularization_loss()
    loss = tf.reduce_mean(loss) + loss_reg
    loss_summary = tf.summary.scalar('loss', loss)

    optimizer = optimizer_init_fn()
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        with tf.name_scope('train'):
            train_op = optimizer.minimize(loss)
                
    with tf.Session() as sess:
        
        #Tensorboard, merge all summaries but the error ones
        merged = tf.summary.merge_all(scope="(?!error)")
        merged_error = tf.summary.merge_all(scope="(error)")
        
        log_path = "C:/tmp/logs"
        train_writer = tf.summary.FileWriter(log_path + '/train/' + experiment_name, sess.graph)
        test_writer = tf.summary.FileWriter(log_path + '/test/' + experiment_name)
        
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        if restore:
            saver.restore(sess, f"C:/tmp/save/{experiment_name}.ckpt")
            print(f"Model restored at epoch {epoch_num}.")
        
        t = 0
        for epoch in tnrange(epoch_num, num_epochs+1, leave=False):
            print('\nStarting epoch %d' % (epoch))
            #decay learning rate
            if decay_at != None and epoch in decay_at:
                optimizer.learning_rate = decay_to[decay_at.index(epoch)]
                print("Learning rate has changed to:", optimizer.learning_rate)
            
            for x_np, y_np in tqdm_notebook(train_dset, leave=False):
                feed_dict = {x: x_np, y: y_np, is_training: True}
                summary, loss_np, _ = sess.run([merged, loss, train_op], feed_dict=feed_dict)
                train_writer.add_summary(summary, t)
                
                #check_accuracy and add to tensorboard every 200 steps
                if t % 200 == 0 or t % print_every == 0:
                    FLAG_print=False
                    if t % print_every == 0:
                        print('Iteration %d, loss = %.4f' % (t, loss_np))
                        FLAG_print=True
                    acc_test = check_acc_tb(sess, val_dset, x, scores,
                                            is_training, FLAG_print=FLAG_print)
                    error_test = 1 - acc_test
                    error_train = 1 - check_acc_train(sess, x_np, y_np, x, scores,
                                                   is_training, FLAG_print=False)
                    summ = sess.run(merged_error, feed_dict={tf_error_ph : error_test})
                    test_writer.add_summary(summ, t)
                    train_writer.add_summary(sess.run(merged_error, feed_dict={tf_error_ph : error_train}), t)
                t += 1
            #Save every epoch
            save_path = saver.save(sess, f"C:/tmp/save/{experiment_name}_epoch{epoch}.ckpt")
            print("Model saved in path: %s" % save_path)
                                            
        print('\nEnd of training, loss = %.4f' % (loss_np))
        acc = check_acc_tb(sess, val_dset, x, scores, is_training=is_training, FLAG_print=True)
        error_test = 1 - acc_test
        error_train = 1 - check_acc_train(sess, x_np, y_np, x, scores,
                                                   is_training, FLAG_print=False)
        summ = sess.run(merged_error, feed_dict={tf_error_ph : error_test})
        test_writer.add_summary(summ, t)
        train_writer.add_summary(sess.run(merged_error, feed_dict={tf_error_ph : error_train}), t)
        print()
        
        epoch_save = num_epochs
        # Save the variables to disk.
        save_path = saver.save(sess, f"C:/tmp/save/{experiment_name}.ckpt")
        print("Model saved in path: %s" % save_path)
        return acc

In [9]:
num_epochs = 5
total_layers = 20
dropout_prob = 1
learning_rate = 0.001
reg = 0.001
decay_at=[15]
decay_to=[0.0001]
print_every = 5000

for i in range(10):
    learning_rate = 10 ** np.random.uniform(-4,-2)
    reg = 10 ** np.random.uniform(-5,-2)

    name = (f"CIFAR_ResNet{total_layers}_lr{learning_rate}"
                f"_dp{dropout_prob}_reg{reg}_randomsearch")  #_decay{decay_at}_{decay_to}

    def model_init_fn(inputs, is_training, total_layers=total_layers, reg=reg):
        return model_ResNetv2(inputs, is_training, total_layers=total_layers,
                               dropout=dropout_prob, reg=reg)
    def optimizer_init_fn():
        return tf.train.AdamOptimizer(learning_rate=learning_rate)

    train_part5(model_init_fn, optimizer_init_fn, num_epochs, experiment_name=name, decay_at=decay_at, decay_to=decay_to)


Starting epoch 1


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 0, loss = 6.5144
Got 119 / 1000 correct (11.90%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.000346252700401509_dp1_reg0.0010062125630823956_randomsearch_epoch1.ckpt

Starting epoch 2


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.000346252700401509_dp1_reg0.0010062125630823956_randomsearch_epoch2.ckpt

Starting epoch 3


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.000346252700401509_dp1_reg0.0010062125630823956_randomsearch_epoch3.ckpt

Starting epoch 4


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.000346252700401509_dp1_reg0.0010062125630823956_randomsearch_epoch4.ckpt

Starting epoch 5


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.000346252700401509_dp1_reg0.0010062125630823956_randomsearch_epoch5.ckpt

End of training, loss = 1.5736
Got 603 / 1000 correct (60.30%)

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.000346252700401509_dp1_reg0.0010062125630823956_randomsearch.ckpt

Starting epoch 1


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 0, loss = 4.8870
Got 79 / 1000 correct (7.90%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.0018661241972018135_dp1_reg5.565349203416563e-05_randomsearch_epoch1.ckpt

Starting epoch 2


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.0018661241972018135_dp1_reg5.565349203416563e-05_randomsearch_epoch2.ckpt

Starting epoch 3


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.0018661241972018135_dp1_reg5.565349203416563e-05_randomsearch_epoch3.ckpt

Starting epoch 4


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.0018661241972018135_dp1_reg5.565349203416563e-05_randomsearch_epoch4.ckpt

Starting epoch 5


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.0018661241972018135_dp1_reg5.565349203416563e-05_randomsearch_epoch5.ckpt

End of training, loss = 0.6752
Got 677 / 1000 correct (67.70%)

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.0018661241972018135_dp1_reg5.565349203416563e-05_randomsearch.ckpt

Starting epoch 1


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 0, loss = 5.4717
Got 79 / 1000 correct (7.90%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.0045884485558024815_dp1_reg1.7566236286428013e-05_randomsearch_epoch1.ckpt

Starting epoch 2


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.0045884485558024815_dp1_reg1.7566236286428013e-05_randomsearch_epoch2.ckpt

Starting epoch 3


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.0045884485558024815_dp1_reg1.7566236286428013e-05_randomsearch_epoch3.ckpt

Starting epoch 4


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.0045884485558024815_dp1_reg1.7566236286428013e-05_randomsearch_epoch4.ckpt

Starting epoch 5


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.0045884485558024815_dp1_reg1.7566236286428013e-05_randomsearch_epoch5.ckpt

End of training, loss = 0.7521
Got 693 / 1000 correct (69.30%)

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.0045884485558024815_dp1_reg1.7566236286428013e-05_randomsearch.ckpt

Starting epoch 1


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 0, loss = 13.6078
Got 88 / 1000 correct (8.80%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00018712893253893968_dp1_reg0.006236215927519998_randomsearch_epoch1.ckpt

Starting epoch 2


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00018712893253893968_dp1_reg0.006236215927519998_randomsearch_epoch2.ckpt

Starting epoch 3


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00018712893253893968_dp1_reg0.006236215927519998_randomsearch_epoch3.ckpt

Starting epoch 4


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00018712893253893968_dp1_reg0.006236215927519998_randomsearch_epoch4.ckpt

Starting epoch 5


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00018712893253893968_dp1_reg0.006236215927519998_randomsearch_epoch5.ckpt

End of training, loss = 2.5694
Got 529 / 1000 correct (52.90%)

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00018712893253893968_dp1_reg0.006236215927519998_randomsearch.ckpt

Starting epoch 1


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 0, loss = 4.9436
Got 110 / 1000 correct (11.00%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00092705145748776_dp1_reg3.582106725435376e-05_randomsearch_epoch1.ckpt

Starting epoch 2


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00092705145748776_dp1_reg3.582106725435376e-05_randomsearch_epoch2.ckpt

Starting epoch 3


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00092705145748776_dp1_reg3.582106725435376e-05_randomsearch_epoch3.ckpt

Starting epoch 4


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00092705145748776_dp1_reg3.582106725435376e-05_randomsearch_epoch4.ckpt

Starting epoch 5


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00092705145748776_dp1_reg3.582106725435376e-05_randomsearch_epoch5.ckpt

End of training, loss = 0.6509
Got 680 / 1000 correct (68.00%)

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00092705145748776_dp1_reg3.582106725435376e-05_randomsearch.ckpt

Starting epoch 1


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 0, loss = 5.7665
Got 113 / 1000 correct (11.30%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00033897316923009506_dp1_reg0.0001573110340097451_randomsearch_epoch1.ckpt

Starting epoch 2


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00033897316923009506_dp1_reg0.0001573110340097451_randomsearch_epoch2.ckpt

Starting epoch 3


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00033897316923009506_dp1_reg0.0001573110340097451_randomsearch_epoch3.ckpt

Starting epoch 4


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00033897316923009506_dp1_reg0.0001573110340097451_randomsearch_epoch4.ckpt

Starting epoch 5


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00033897316923009506_dp1_reg0.0001573110340097451_randomsearch_epoch5.ckpt

End of training, loss = 1.0621
Got 617 / 1000 correct (61.70%)

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00033897316923009506_dp1_reg0.0001573110340097451_randomsearch.ckpt

Starting epoch 1


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 0, loss = 4.9757
Got 117 / 1000 correct (11.70%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00011909811621863726_dp1_reg0.00034430879579679634_randomsearch_epoch1.ckpt

Starting epoch 2


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00011909811621863726_dp1_reg0.00034430879579679634_randomsearch_epoch2.ckpt

Starting epoch 3


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00011909811621863726_dp1_reg0.00034430879579679634_randomsearch_epoch3.ckpt

Starting epoch 4


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00011909811621863726_dp1_reg0.00034430879579679634_randomsearch_epoch4.ckpt

Starting epoch 5


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00011909811621863726_dp1_reg0.00034430879579679634_randomsearch_epoch5.ckpt

End of training, loss = 1.9971
Got 519 / 1000 correct (51.90%)

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00011909811621863726_dp1_reg0.00034430879579679634_randomsearch.ckpt

Starting epoch 1


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 0, loss = 16.0536
Got 109 / 1000 correct (10.90%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00045482876571418114_dp1_reg0.008050026927464357_randomsearch_epoch1.ckpt

Starting epoch 2


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00045482876571418114_dp1_reg0.008050026927464357_randomsearch_epoch2.ckpt

Starting epoch 3


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00045482876571418114_dp1_reg0.008050026927464357_randomsearch_epoch3.ckpt

Starting epoch 4


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00045482876571418114_dp1_reg0.008050026927464357_randomsearch_epoch4.ckpt

Starting epoch 5


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00045482876571418114_dp1_reg0.008050026927464357_randomsearch_epoch5.ckpt

End of training, loss = 1.2270
Got 618 / 1000 correct (61.80%)

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.00045482876571418114_dp1_reg0.008050026927464357_randomsearch.ckpt

Starting epoch 1


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 0, loss = 7.2936
Got 105 / 1000 correct (10.50%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.009173881787290492_dp1_reg0.0018183002920375408_randomsearch_epoch1.ckpt

Starting epoch 2


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.009173881787290492_dp1_reg0.0018183002920375408_randomsearch_epoch2.ckpt

Starting epoch 3


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.009173881787290492_dp1_reg0.0018183002920375408_randomsearch_epoch3.ckpt

Starting epoch 4


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.009173881787290492_dp1_reg0.0018183002920375408_randomsearch_epoch4.ckpt

Starting epoch 5


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.009173881787290492_dp1_reg0.0018183002920375408_randomsearch_epoch5.ckpt

End of training, loss = 1.4229
Got 232 / 1000 correct (23.20%)

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.009173881787290492_dp1_reg0.0018183002920375408_randomsearch.ckpt

Starting epoch 1


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 0, loss = 10.5498
Got 107 / 1000 correct (10.70%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.0035683168821889393_dp1_reg0.003993938026655614_randomsearch_epoch1.ckpt

Starting epoch 2


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.0035683168821889393_dp1_reg0.003993938026655614_randomsearch_epoch2.ckpt

Starting epoch 3


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.0035683168821889393_dp1_reg0.003993938026655614_randomsearch_epoch3.ckpt

Starting epoch 4


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.0035683168821889393_dp1_reg0.003993938026655614_randomsearch_epoch4.ckpt

Starting epoch 5


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.0035683168821889393_dp1_reg0.003993938026655614_randomsearch_epoch5.ckpt

End of training, loss = 1.5402
Got 242 / 1000 correct (24.20%)

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.0035683168821889393_dp1_reg0.003993938026655614_randomsearch.ckpt


In [60]:
num_epochs = 20
total_layers = 20
dropout_prob = 1
learning_rate = 0.001
reg = 0.001
decay_at=None
decay_to=None
print_every = 200

name = (f"CIFAR_ResNet{total_layers}_lr{learning_rate}"
            f"_dp{dropout_prob}_reg{reg}_decay{decay_at}_{decay_to}")  

def model_init_fn(inputs, is_training, total_layers=total_layers, reg=reg):
    return model_ResNetv2(inputs, is_training, total_layers=total_layers,
                           dropout=dropout_prob, reg=reg)
def optimizer_init_fn():
    return tf.train.AdamOptimizer(learning_rate=learning_rate)

train_part5(model_init_fn, optimizer_init_fn, num_epochs, experiment_name=name, decay_at=decay_at, decay_to=decay_to)

HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Starting epoch 1


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 0, loss = 4.7160
Got 126 / 1000 correct (12.60%)
Iteration 200, loss = 3.0927
Got 271 / 1000 correct (27.10%)
Iteration 400, loss = 2.9107
Got 365 / 1000 correct (36.50%)
Iteration 600, loss = 2.4353
Got 426 / 1000 correct (42.60%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch1.ckpt

Starting epoch 2


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 800, loss = 2.2726
Got 494 / 1000 correct (49.40%)
Iteration 1000, loss = 1.9236
Got 451 / 1000 correct (45.10%)
Iteration 1200, loss = 1.9505
Got 495 / 1000 correct (49.50%)
Iteration 1400, loss = 1.5952
Got 495 / 1000 correct (49.50%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch2.ckpt

Starting epoch 3


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 1600, loss = 1.3738
Got 591 / 1000 correct (59.10%)
Iteration 1800, loss = 1.5485
Got 555 / 1000 correct (55.50%)
Iteration 2000, loss = 1.6307
Got 644 / 1000 correct (64.40%)
Iteration 2200, loss = 1.3290
Got 580 / 1000 correct (58.00%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch3.ckpt

Starting epoch 4


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 2400, loss = 1.3419
Got 590 / 1000 correct (59.00%)
Iteration 2600, loss = 1.2900
Got 571 / 1000 correct (57.10%)
Iteration 2800, loss = 1.2712
Got 582 / 1000 correct (58.20%)
Iteration 3000, loss = 1.3387
Got 661 / 1000 correct (66.10%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch4.ckpt

Starting epoch 5


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 3200, loss = 1.3032
Got 656 / 1000 correct (65.60%)
Iteration 3400, loss = 1.2524
Got 616 / 1000 correct (61.60%)
Iteration 3600, loss = 1.2298
Got 602 / 1000 correct (60.20%)
Iteration 3800, loss = 1.0159
Got 626 / 1000 correct (62.60%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch5.ckpt

Starting epoch 6


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 4000, loss = 1.1980
Got 558 / 1000 correct (55.80%)
Iteration 4200, loss = 0.9223
Got 628 / 1000 correct (62.80%)
Iteration 4400, loss = 0.8907
Got 636 / 1000 correct (63.60%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch6.ckpt

Starting epoch 7


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 4600, loss = 1.1643
Got 595 / 1000 correct (59.50%)
Iteration 4800, loss = 1.0235
Got 720 / 1000 correct (72.00%)
Iteration 5000, loss = 0.9451
Got 700 / 1000 correct (70.00%)
Iteration 5200, loss = 0.8767
Got 607 / 1000 correct (60.70%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch7.ckpt

Starting epoch 8


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 5400, loss = 0.6949
Got 695 / 1000 correct (69.50%)
Iteration 5600, loss = 0.8780
Got 508 / 1000 correct (50.80%)
Iteration 5800, loss = 0.9451
Got 625 / 1000 correct (62.50%)
Iteration 6000, loss = 0.9976
Got 686 / 1000 correct (68.60%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch8.ckpt

Starting epoch 9


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 6200, loss = 0.9410
Got 719 / 1000 correct (71.90%)
Iteration 6400, loss = 0.8788
Got 596 / 1000 correct (59.60%)
Iteration 6600, loss = 0.7414
Got 634 / 1000 correct (63.40%)
Iteration 6800, loss = 0.9207
Got 717 / 1000 correct (71.70%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch9.ckpt

Starting epoch 10


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 7000, loss = 0.7620
Got 708 / 1000 correct (70.80%)
Iteration 7200, loss = 0.8095
Got 618 / 1000 correct (61.80%)
Iteration 7400, loss = 0.6733
Got 651 / 1000 correct (65.10%)
Iteration 7600, loss = 0.8453
Got 667 / 1000 correct (66.70%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch10.ckpt

Starting epoch 11


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 7800, loss = 0.9423
Got 622 / 1000 correct (62.20%)
Iteration 8000, loss = 0.7496
Got 711 / 1000 correct (71.10%)
Iteration 8200, loss = 0.9104
Got 697 / 1000 correct (69.70%)
Iteration 8400, loss = 0.6674
Got 650 / 1000 correct (65.00%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch11.ckpt

Starting epoch 12


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 8600, loss = 0.9363
Got 655 / 1000 correct (65.50%)
Iteration 8800, loss = 0.7933
Got 703 / 1000 correct (70.30%)
Iteration 9000, loss = 1.1443
Got 635 / 1000 correct (63.50%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch12.ckpt

Starting epoch 13


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 9200, loss = 0.6907
Got 626 / 1000 correct (62.60%)
Iteration 9400, loss = 0.7334
Got 616 / 1000 correct (61.60%)
Iteration 9600, loss = 0.8030
Got 712 / 1000 correct (71.20%)
Iteration 9800, loss = 0.8964
Got 590 / 1000 correct (59.00%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch13.ckpt

Starting epoch 14


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 10000, loss = 0.8651
Got 697 / 1000 correct (69.70%)
Iteration 10200, loss = 0.7135
Got 656 / 1000 correct (65.60%)
Iteration 10400, loss = 1.0694
Got 657 / 1000 correct (65.70%)
Iteration 10600, loss = 0.7452
Got 691 / 1000 correct (69.10%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch14.ckpt

Starting epoch 15


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 10800, loss = 0.7247
Got 712 / 1000 correct (71.20%)
Iteration 11000, loss = 0.7966
Got 727 / 1000 correct (72.70%)
Iteration 11200, loss = 0.8547
Got 713 / 1000 correct (71.30%)
Iteration 11400, loss = 1.0107
Got 763 / 1000 correct (76.30%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch15.ckpt

Starting epoch 16


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 11600, loss = 1.0497
Got 688 / 1000 correct (68.80%)
Iteration 11800, loss = 0.8355
Got 693 / 1000 correct (69.30%)
Iteration 12000, loss = 0.7387
Got 693 / 1000 correct (69.30%)
Iteration 12200, loss = 0.9202
Got 727 / 1000 correct (72.70%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch16.ckpt

Starting epoch 17


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 12400, loss = 0.6858
Got 688 / 1000 correct (68.80%)
Iteration 12600, loss = 0.7614
Got 694 / 1000 correct (69.40%)
Iteration 12800, loss = 0.5758
Got 703 / 1000 correct (70.30%)
Iteration 13000, loss = 0.8071
Got 593 / 1000 correct (59.30%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch17.ckpt

Starting epoch 18


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 13200, loss = 0.7769
Got 696 / 1000 correct (69.60%)
Iteration 13400, loss = 0.6824
Got 667 / 1000 correct (66.70%)
Iteration 13600, loss = 0.6905
Got 686 / 1000 correct (68.60%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch18.ckpt

Starting epoch 19


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 13800, loss = 0.7498
Got 681 / 1000 correct (68.10%)
Iteration 14000, loss = 0.8357
Got 696 / 1000 correct (69.60%)
Iteration 14200, loss = 0.7807
Got 714 / 1000 correct (71.40%)
Iteration 14400, loss = 0.7869
Got 685 / 1000 correct (68.50%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch19.ckpt

Starting epoch 20


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 14600, loss = 0.9563
Got 699 / 1000 correct (69.90%)
Iteration 14800, loss = 0.7483
Got 723 / 1000 correct (72.30%)
Iteration 15000, loss = 0.7589
Got 732 / 1000 correct (73.20%)
Iteration 15200, loss = 0.6908
Got 735 / 1000 correct (73.50%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop_epoch20.ckpt

End of training, loss = 0.6451
Got 747 / 1000 correct (74.70%)

Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.001_dp1_reg0.001_nocrop.ckpt


0.747

In [52]:
num_epochs = 20
total_layers = 20
dropout_prob = 1
learning_rate = 0.01
reg = 0.0001
decay_at=[2]
decay_to=[0.1]
print_every = 365

name = (f"CIFAR_ResNet{total_layers}_lr{learning_rate}"
            f"_decay{decay_at}_{decay_to}_reg{reg}_momentum")

def model_init_fn(inputs, is_training, total_layers=total_layers, reg=reg):
    return model_ResNetv2(inputs, is_training, total_layers=total_layers,
                           dropout=dropout_prob, reg=reg)
def optimizer_init_fn():
    return tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)

train_part5(model_init_fn, optimizer_init_fn, num_epochs, experiment_name=name, decay_at=decay_at, decay_to=decay_to)


Starting epoch 1


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 0, loss = 2.9103
Got 79 / 1000 correct (7.90%)
Iteration 365, loss = nan
Got 87 / 1000 correct (8.70%)
Iteration 730, loss = nan
Got 87 / 1000 correct (8.70%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.1_decayNone_None_reg0.0001_momentum_epoch1.ckpt

Starting epoch 2


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

Iteration 1095, loss = nan
Got 87 / 1000 correct (8.70%)
Iteration 1460, loss = nan
Got 87 / 1000 correct (8.70%)
Model saved in path: C:/tmp/save/CIFAR_ResNet20_lr0.1_decayNone_None_reg0.0001_momentum_epoch2.ckpt

Starting epoch 3


HBox(children=(IntProgress(value=0, max=765), HTML(value='')))

KeyboardInterrupt: 