In [0]:
import os
import tensorflow as tf
import numpy as np
import math
import timeit
import matplotlib.pyplot as plt

%matplotlib inline

In [60]:
def load_cifar10(num_training=49000, num_validation=1000, num_test=10000):
    """
    Fetch the CIFAR-10 dataset from the web and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.
    """
    # Load the raw CIFAR-10 dataset and use appropriate data types and shapes
    cifar10 = tf.keras.datasets.cifar10.load_data()
    (X_train, y_train), (X_test, y_test) = cifar10
    X_train = np.asarray(X_train, dtype=np.float32)
    y_train = np.asarray(y_train, dtype=np.int32).flatten()
    X_test = np.asarray(X_test, dtype=np.float32)
    y_test = np.asarray(y_test, dtype=np.int32).flatten()

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean pixel and divide by std
    mean_pixel = X_train.mean(axis=(0, 1, 2), keepdims=True)
    std_pixel = X_train.std(axis=(0, 1, 2), keepdims=True)
    X_train = (X_train - mean_pixel) / std_pixel
    X_val = (X_val - mean_pixel) / std_pixel
    X_test = (X_test - mean_pixel) / std_pixel

    return X_train, y_train, X_val, y_val, X_test, y_test

# If there are errors with SSL downloading involving self-signed certificates,
# it may be that your Python version was recently installed on the current machine.
# See: https://github.com/tensorflow/tensorflow/issues/10779
# To fix, run the command: /Applications/Python\ 3.7/Install\ Certificates.command
#   ...replacing paths as necessary.

# Invoke the above function to get our data.
NHW = (0, 1, 2)
X_train, y_train, X_val, y_val, X_test, y_test = load_cifar10()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape, y_train.dtype)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Train data shape:  (49000, 32, 32, 3)
Train labels shape:  (49000,) int32
Validation data shape:  (1000, 32, 32, 3)
Validation labels shape:  (1000,)
Test data shape:  (10000, 32, 32, 3)
Test labels shape:  (10000,)


In [0]:
class Dataset(object):
    def __init__(self, X, y, batch_size, shuffle=False):
        """
        Construct a Dataset object to iterate over data X and labels y
        
        Inputs:
        - X: Numpy array of data, of any shape
        - y: Numpy array of labels, of any shape but with y.shape[0] == X.shape[0]
        - batch_size: Integer giving number of elements per minibatch
        - shuffle: (optional) Boolean, whether to shuffle the data on each epoch
        """
        assert X.shape[0] == y.shape[0], 'Got different numbers of data and labels'
        self.X, self.y = X, y
        self.batch_size, self.shuffle = batch_size, shuffle

    def __iter__(self):
        N, B = self.X.shape[0], self.batch_size
        idxs = np.arange(N)
        if self.shuffle:
            np.random.shuffle(idxs)
        return iter((self.X[i:i+B], self.y[i:i+B]) for i in range(0, N, B))


train_dset = Dataset(X_train, y_train, batch_size=64, shuffle=True)
val_dset = Dataset(X_val, y_val, batch_size=64, shuffle=False)
test_dset = Dataset(X_test, y_test, batch_size=64)

In [62]:
# We can iterate through a dataset like this:
for t, (x, y) in enumerate(train_dset):
    print(t, x.shape, y.shape)
    if t > 5: break

0 (64, 32, 32, 3) (64,)
1 (64, 32, 32, 3) (64,)
2 (64, 32, 32, 3) (64,)
3 (64, 32, 32, 3) (64,)
4 (64, 32, 32, 3) (64,)
5 (64, 32, 32, 3) (64,)
6 (64, 32, 32, 3) (64,)


In [0]:
def flatten(x):
    """    
    Input:
    - TensorFlow Tensor of shape (N, D1, ..., DM)
    
    Output:
    - TensorFlow Tensor of shape (N, D1 * ... * DM)
    """
    N = tf.shape(x)[0]
    return tf.reshape(x, (N, -1))

In [0]:
def train_part34(model_init_fn, optimizer_init_fn, num_epochs=1, is_training=False):
    """
    Simple training loop for use with models defined using tf.keras. It trains
    a model for one epoch on the CIFAR-10 training set and periodically checks
    accuracy on the CIFAR-10 validation set.
    
    Inputs:
    - model_init_fn: A function that takes no parameters; when called it
      constructs the model we want to train: model = model_init_fn()
    - optimizer_init_fn: A function which takes no parameters; when called it
      constructs the Optimizer object we will use to optimize the model:
      optimizer = optimizer_init_fn()
    - num_epochs: The number of epochs to train for
    
    Returns: Nothing, but prints progress during trainingn
    """    
    with tf.device(device):

        # Compute the loss like we did in Part II
        loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
        
        model = model_init_fn()
        optimizer = optimizer_init_fn()
        
        train_loss = tf.keras.metrics.Mean(name='train_loss')
        train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
    
        val_loss = tf.keras.metrics.Mean(name='val_loss')
        val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')
        
        t = 0
        for epoch in range(num_epochs):
            
            # Reset the metrics - https://www.tensorflow.org/alpha/guide/migration_guide#new-style_metrics
            train_loss.reset_states()
            train_accuracy.reset_states()
            
            for x_np, y_np in train_dset:
                with tf.GradientTape() as tape:
                    
                    # Use the model function to build the forward pass.
                    scores = model(x_np, training=is_training)
                    loss = loss_fn(y_np, scores)
      
                    gradients = tape.gradient(loss, model.trainable_variables)
                    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
                    
                    # Update the metrics
                    train_loss.update_state(loss)
                    train_accuracy.update_state(y_np, scores)
                    
                    if t % print_every == 0:
                        val_loss.reset_states()
                        val_accuracy.reset_states()
                        for test_x, test_y in val_dset:
                            # During validation at end of epoch, training set to False
                            prediction = model(test_x, training=False)
                            t_loss = loss_fn(test_y, prediction)

                            val_loss.update_state(t_loss)
                            val_accuracy.update_state(test_y, prediction)
                        
                        template = 'Iteration {}, Epoch {}, Loss: {}, Accuracy: {}, Val Loss: {}, Val Accuracy: {}'
                        print (template.format(t, epoch+1,
                                             train_loss.result(),
                                             train_accuracy.result()*100,
                                             val_loss.result(),
                                             val_accuracy.result()*100))
                    t += 1

((CONV-BATCH_NORM-RELU)x2 - MAXPOOL) - ((CONV-BATCH_NORM-RELU)x2 - MAXPOOL) - BATCH_NORM - (DENSE-DROPOUT-BATCH_NORM)X4 - DENSE

In [49]:
class CustomConvNet(tf.keras.Model):
    def __init__(self):
        super(CustomConvNet, self).__init__()
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        initializer = tf.initializers.VarianceScaling(scale=2.0)
        self.conv1 = tf.keras.layers.Conv2D(64, (3,3), strides = 1, padding='same')
        self.norm1 = tf.keras.layers.BatchNormalization()
        self.relu1 = tf.keras.layers.ReLU()
        self.conv2 = tf.keras.layers.Conv2D(128, (5,5), strides = 1, padding='same')
        self.norm2 = tf.keras.layers.BatchNormalization()
        self.relu2 = tf.keras.layers.ReLU()
        self.pool1 = tf.keras.layers.MaxPool2D((2,2))
        
        self.conv3 = tf.keras.layers.Conv2D(256, (5,5), strides = 1, padding='same')
        self.norm3 = tf.keras.layers.BatchNormalization()
        self.relu3 = tf.keras.layers.ReLU()
        self.conv4 = tf.keras.layers.Conv2D(512, (5,5), strides = 1, padding='same')
        self.norm4 = tf.keras.layers.BatchNormalization()
        self.relu4 = tf.keras.layers.ReLU()
        self.pool2 = tf.keras.layers.MaxPool2D((2,2))
        
        
        self.norm5 = tf.keras.layers.BatchNormalization()

        self.fc1 = tf.keras.layers.Dense(128,activation = tf.nn.relu)
        self.drop1 = tf.keras.layers.Dropout(0.3)
        self.normf1 = tf.keras.layers.BatchNormalization()
        self.fc2 = tf.keras.layers.Dense(256,activation = tf.nn.relu)
        self.drop2 = tf.keras.layers.Dropout(0.3)
        self.normf2 = tf.keras.layers.BatchNormalization()
        self.fc3 = tf.keras.layers.Dense(512,activation = tf.nn.relu)
        self.drop3 = tf.keras.layers.Dropout(0.3)
        self.normf3 = tf.keras.layers.BatchNormalization()
        self.fc4 = tf.keras.layers.Dense(1024, activation = tf.nn.relu)
        self.drop4 = tf.keras.layers.Dropout(0.3)
        self.normf4 = tf.keras.layers.BatchNormalization()
        self.final = tf.keras.layers.Dense(10,activation = 'softmax')
        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################
    
    def call(self, input_tensor, training=False):
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        #x = tf.keras.applications.resnet50.preprocess_input(input_tensor)
        x = self.conv1(input_tensor)
        x = self.norm1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.norm2(x)
        x = self.relu2(x)
        x = self.pool1(x) 

        x = self.conv3(x)
        x = self.norm3(x)
        x = self.relu3(x)
        x = self.conv4(x)
        x = self.norm4(x)
        x = self.relu4(x)
        x = self.pool2(x) 

        x = self.norm5(x)
        
        x = tf.keras.layers.Flatten()(x)
        x = self.fc1(x)
        x = self.drop1(x)
        x = self.normf1(x)
        x = self.fc2(x)
        x = self.drop2(x)
        x = self.normf2(x)
        x = self.fc3(x)
        x = self.drop3(x)
        x = self.normf3(x)
        x = self.fc4(x)
        x = self.drop4(x)
        x = self.normf4(x)
        scores = self.final(x)
        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################
        
        return scores

device = '/device:GPU:0'   # Change this to a CPU/GPU as you wish!
# device = '/cpu:0'        # Change this to a CPU/GPU as you wish!
print_every = 700
num_epochs = 10

model = CustomConvNet()

def model_init_fn():
    return CustomConvNet()

def optimizer_init_fn():
    learning_rate = 1e-3
    return tf.keras.optimizers.Adam(learning_rate) 

train_part34(model_init_fn, optimizer_init_fn, num_epochs=num_epochs, is_training=True)

Iteration 0, Epoch 1, Loss: 2.89121675491333, Accuracy: 4.6875, Val Loss: 2.3063817024230957, Val Accuracy: 11.100000381469727
Iteration 700, Epoch 1, Loss: 1.8064565658569336, Accuracy: 37.836570739746094, Val Loss: 1.3094923496246338, Val Accuracy: 54.400001525878906
Iteration 1400, Epoch 2, Loss: 1.1325888633728027, Accuracy: 60.268211364746094, Val Loss: 0.9962601661682129, Val Accuracy: 65.5999984741211
Iteration 2100, Epoch 3, Loss: 0.9067550301551819, Accuracy: 68.83512878417969, Val Loss: 0.8226927518844604, Val Accuracy: 71.10000610351562
Iteration 2800, Epoch 4, Loss: 0.7414024472236633, Accuracy: 74.81050872802734, Val Loss: 0.9280353784561157, Val Accuracy: 68.69999694824219
Iteration 3500, Epoch 5, Loss: 0.6234967708587646, Accuracy: 78.99027252197266, Val Loss: 0.6712948679924011, Val Accuracy: 77.9000015258789
Iteration 4200, Epoch 6, Loss: 0.5162390470504761, Accuracy: 82.65666961669922, Val Loss: 0.7679693698883057, Val Accuracy: 75.5
Iteration 4900, Epoch 7, Loss: 0.4

((CONV-RELU-BATCH_NORM)x2 - MAXPOOL) - ((CONV-RELU-BATCH_NORM)x2 - MAXPOOL) - BATCH_NORM - (DENSE-DROPOUT-BATCH_NORM)X4 - DENSE

In [0]:
class CustomConvNet2(tf.keras.Model):
    def __init__(self):
        super(CustomConvNet2, self).__init__()
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        initializer = tf.initializers.VarianceScaling(scale=2.0)
        self.conv1 = tf.keras.layers.Conv2D(64, (3,3), strides = 1, padding='same')
        self.relu1 = tf.keras.layers.ReLU()
        self.norm1 = tf.keras.layers.BatchNormalization()
        self.conv2 = tf.keras.layers.Conv2D(128, (5,5), strides = 1, padding='same')
        self.relu2 = tf.keras.layers.ReLU()
        self.norm2 = tf.keras.layers.BatchNormalization()
        self.pool1 = tf.keras.layers.MaxPool2D((2,2))
        
        self.conv3 = tf.keras.layers.Conv2D(256, (5,5), strides = 1, padding='same')
        self.relu3 = tf.keras.layers.ReLU()
        self.norm3 = tf.keras.layers.BatchNormalization()
        self.conv4 = tf.keras.layers.Conv2D(512, (5,5), strides = 1, padding='same')
        self.relu4 = tf.keras.layers.ReLU()
        self.norm4 = tf.keras.layers.BatchNormalization()
        self.pool2 = tf.keras.layers.MaxPool2D((2,2))
        
        
        self.norm5 = tf.keras.layers.BatchNormalization()

        self.fc1 = tf.keras.layers.Dense(128,activation = tf.nn.relu)
        self.drop1 = tf.keras.layers.Dropout(0.3)
        self.normf1 = tf.keras.layers.BatchNormalization()
        self.fc2 = tf.keras.layers.Dense(256,activation = tf.nn.relu)
        self.drop2 = tf.keras.layers.Dropout(0.3)
        self.normf2 = tf.keras.layers.BatchNormalization()
        self.fc3 = tf.keras.layers.Dense(512,activation = tf.nn.relu)
        self.drop3 = tf.keras.layers.Dropout(0.3)
        self.normf3 = tf.keras.layers.BatchNormalization()
        self.fc4 = tf.keras.layers.Dense(1024, activation = tf.nn.relu)
        self.drop4 = tf.keras.layers.Dropout(0.3)
        self.normf4 = tf.keras.layers.BatchNormalization()
        self.final = tf.keras.layers.Dense(10,activation = 'softmax')
        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################
    
    def call(self, input_tensor, training=False):
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        #x = tf.keras.applications.resnet50.preprocess_input(input_tensor)
        x = self.conv1(input_tensor)
        x = self.relu1(x)
        x = self.norm1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.norm2(x)
        x = self.pool1(x) 

        x = self.conv3(x)
        x = self.relu3(x)
        x = self.norm3(x)
        x = self.conv4(x)
        x = self.relu4(x)
        x = self.norm4(x)
        x = self.pool2(x) 

        x = self.norm5(x)
        
        x = tf.keras.layers.Flatten()(x)
        x = self.fc1(x)
        x = self.drop1(x)
        x = self.normf1(x)
        x = self.fc2(x)
        x = self.drop2(x)
        x = self.normf2(x)
        x = self.fc3(x)
        x = self.drop3(x)
        x = self.normf3(x)
        x = self.fc4(x)
        x = self.drop4(x)
        x = self.normf4(x)
        scores = self.final(x)
        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################
        
        return scores


In [51]:

device = '/device:GPU:0'   # Change this to a CPU/GPU as you wish!
# device = '/cpu:0'        # Change this to a CPU/GPU as you wish!
print_every = 700
num_epochs = 10

model = CustomConvNet2()

def model_init_fn():
    return CustomConvNet2()

def optimizer_init_fn():
    learning_rate = 1e-3
    return tf.keras.optimizers.Adam(learning_rate) 

train_part34(model_init_fn, optimizer_init_fn, num_epochs=num_epochs, is_training=True)

Iteration 0, Epoch 1, Loss: 3.045879364013672, Accuracy: 7.8125, Val Loss: 2.2978439331054688, Val Accuracy: 13.699999809265137
Iteration 700, Epoch 1, Loss: 1.7242741584777832, Accuracy: 40.84789276123047, Val Loss: 1.2221026420593262, Val Accuracy: 57.0
Iteration 1400, Epoch 2, Loss: 1.029404640197754, Accuracy: 64.39960479736328, Val Loss: 0.8286232352256775, Val Accuracy: 70.9000015258789
Iteration 2100, Epoch 3, Loss: 0.7759090662002563, Accuracy: 73.769775390625, Val Loss: 0.8225467205047607, Val Accuracy: 72.29999542236328
Iteration 2800, Epoch 4, Loss: 0.5957204699516296, Accuracy: 80.17208862304688, Val Loss: 0.7292961478233337, Val Accuracy: 77.5
Iteration 3500, Epoch 5, Loss: 0.46390238404273987, Accuracy: 84.58595275878906, Val Loss: 0.6513972282409668, Val Accuracy: 79.69999694824219
Iteration 4200, Epoch 6, Loss: 0.34749653935432434, Accuracy: 88.64555358886719, Val Loss: 0.6391110420227051, Val Accuracy: 80.4000015258789
Iteration 4900, Epoch 7, Loss: 0.25587233901023865

In [0]:
class CustomConvNet3(tf.keras.Model):
    def __init__(self):
        super(CustomConvNet3, self).__init__()
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        initializer = tf.initializers.VarianceScaling(scale=2.0)
        self.conv1 = tf.keras.layers.Conv2D(64, (3,3), strides = 1, padding='same')
        self.relu1 = tf.keras.layers.ReLU()
        self.norm1 = tf.keras.layers.BatchNormalization()
        self.conv2 = tf.keras.layers.Conv2D(128, (5,5), strides = 1, padding='same')
        self.relu2 = tf.keras.layers.ReLU()
        self.norm2 = tf.keras.layers.BatchNormalization()
        self.pool1 = tf.keras.layers.MaxPool2D((2,2))
        
        self.conv3 = tf.keras.layers.Conv2D(256, (5,5), strides = 1, padding='same')
        self.relu3 = tf.keras.layers.ReLU()
        self.norm3 = tf.keras.layers.BatchNormalization()
        self.conv4 = tf.keras.layers.Conv2D(512, (5,5), strides = 1, padding='same')
        self.relu4 = tf.keras.layers.ReLU()
        self.norm4 = tf.keras.layers.BatchNormalization()
        self.pool2 = tf.keras.layers.MaxPool2D((2,2))
        
        
        self.norm5 = tf.keras.layers.BatchNormalization()

        self.fc1 = tf.keras.layers.Dense(128,activation = tf.nn.relu)
        self.drop1 = tf.keras.layers.Dropout(0.3)
        self.normf1 = tf.keras.layers.BatchNormalization()
        self.fc2 = tf.keras.layers.Dense(256,activation = tf.nn.relu)
        self.drop2 = tf.keras.layers.Dropout(0.3)
        self.normf2 = tf.keras.layers.BatchNormalization()

        self.final = tf.keras.layers.Dense(10,activation = 'softmax')
        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################
    
    def call(self, input_tensor, training=False):
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        #x = tf.keras.applications.resnet50.preprocess_input(input_tensor)
        x = self.conv1(input_tensor)
        x = self.relu1(x)
        x = self.norm1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.norm2(x)
        x = self.pool1(x) 

        x = self.conv3(x)
        x = self.relu3(x)
        x = self.norm3(x)
        x = self.conv4(x)
        x = self.relu4(x)
        x = self.norm4(x)
        x = self.pool2(x) 

        x = self.norm5(x)
        
        x = tf.keras.layers.Flatten()(x)
        x = self.fc1(x)
        x = self.drop1(x)
        x = self.normf1(x)
        x = self.fc2(x)
        x = self.drop2(x)
        x = self.normf2(x)

        scores = self.final(x)
        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################
        
        return scores


In [58]:

device = '/device:GPU:0'   # Change this to a CPU/GPU as you wish!
# device = '/cpu:0'        # Change this to a CPU/GPU as you wish!
print_every = 700
num_epochs = 10

model = CustomConvNet3()

def model_init_fn():
    return CustomConvNet3()

def optimizer_init_fn():
    learning_rate = 1e-3
    return tf.keras.optimizers.Adam(learning_rate) 

train_part34(model_init_fn, optimizer_init_fn, num_epochs=num_epochs, is_training=True)

Iteration 0, Epoch 1, Loss: 3.192493438720703, Accuracy: 7.8125, Val Loss: 2.295973777770996, Val Accuracy: 11.300000190734863
Iteration 700, Epoch 1, Loss: 1.4527169466018677, Accuracy: 48.78744888305664, Val Loss: 0.9365317225456238, Val Accuracy: 67.29999542236328
Iteration 1400, Epoch 2, Loss: 0.8530118465423584, Accuracy: 70.24360656738281, Val Loss: 0.7477878928184509, Val Accuracy: 71.4000015258789
Iteration 2100, Epoch 3, Loss: 0.6241518259048462, Accuracy: 78.59732055664062, Val Loss: 0.7351481914520264, Val Accuracy: 74.69999694824219
Iteration 2800, Epoch 4, Loss: 0.447843998670578, Accuracy: 84.87822723388672, Val Loss: 0.624416708946228, Val Accuracy: 79.29999542236328
Iteration 3500, Epoch 5, Loss: 0.3144620656967163, Accuracy: 89.22697448730469, Val Loss: 0.6183016300201416, Val Accuracy: 80.9000015258789
Iteration 4200, Epoch 6, Loss: 0.2203754037618637, Accuracy: 92.38544464111328, Val Loss: 0.6192266941070557, Val Accuracy: 82.4000015258789
Iteration 4900, Epoch 7, Lo