<a href="https://colab.research.google.com/github/phuongnguyen99/CS231n_CNN/blob/main/Learning_Tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [2]:
import os
import tensorflow as tf
import numpy as np
import math
import timeit
import matplotlib.pyplot as plt

%matplotlib inline

USE_GPU = True

if USE_GPU:
    device = '/device:GPU:0'
else:
    device = '/cpu:0'

# Constant to control how often we print when training models.
print_every = 100
print('Using device: ', device)

Using device:  /device:GPU:0


In [3]:
def load_cifar10(num_training=49000, num_validation=1000, num_test=10000):
    """
    Fetch the CIFAR-10 dataset from the web and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.
    """
    # Load the raw CIFAR-10 dataset and use appropriate data types and shapes
    cifar10 = tf.keras.datasets.cifar10.load_data()
    (X_train, y_train), (X_test, y_test) = cifar10
    X_train = np.asarray(X_train, dtype=np.float32)
    y_train = np.asarray(y_train, dtype=np.int32).flatten()
    X_test = np.asarray(X_test, dtype=np.float32)
    y_test = np.asarray(y_test, dtype=np.int32).flatten()

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean pixel and divide by std
    mean_pixel = X_train.mean(axis=(0, 1, 2), keepdims=True)
    std_pixel = X_train.std(axis=(0, 1, 2), keepdims=True)
    X_train = (X_train - mean_pixel) / std_pixel
    X_val = (X_val - mean_pixel) / std_pixel
    X_test = (X_test - mean_pixel) / std_pixel

    return X_train, y_train, X_val, y_val, X_test, y_test

# If there are errors with SSL downloading involving self-signed certificates,
# it may be that your Python version was recently installed on the current machine.
# See: https://github.com/tensorflow/tensorflow/issues/10779
# To fix, run the command: /Applications/Python\ 3.7/Install\ Certificates.command
#   ...replacing paths as necessary.

# Invoke the above function to get our data.
NHW = (0, 1, 2)
X_train, y_train, X_val, y_val, X_test, y_test = load_cifar10()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape, y_train.dtype)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Train data shape:  (49000, 32, 32, 3)
Train labels shape:  (49000,) int32
Validation data shape:  (1000, 32, 32, 3)
Validation labels shape:  (1000,)
Test data shape:  (10000, 32, 32, 3)
Test labels shape:  (10000,)


In [4]:
class Dataset(object):
    def __init__(self, X, y, batch_size, shuffle=False):
        """
        Construct a Dataset object to iterate over data X and labels y
        
        Inputs:
        - X: Numpy array of data, of any shape
        - y: Numpy array of labels, of any shape but with y.shape[0] == X.shape[0]
        - batch_size: Integer giving number of elements per minibatch
        - shuffle: (optional) Boolean, whether to shuffle the data on each epoch
        """
        assert X.shape[0] == y.shape[0], 'Got different numbers of data and labels'
        self.X, self.y = X, y
        self.batch_size, self.shuffle = batch_size, shuffle

    def __iter__(self):
        N, B = self.X.shape[0], self.batch_size
        idxs = np.arange(N)
        if self.shuffle:
            np.random.shuffle(idxs)
        return iter((self.X[i:i+B], self.y[i:i+B]) for i in range(0, N, B))


train_dset = Dataset(X_train, y_train, batch_size=64, shuffle=True)
val_dset = Dataset(X_val, y_val, batch_size=64, shuffle=False)
test_dset = Dataset(X_test, y_test, batch_size=64)

In [5]:
# We can iterate through a dataset like this:
for t, (x, y) in enumerate(train_dset):
    print(t, x.shape, y.shape)
    if t > 5: break
    

0 (64, 32, 32, 3) (64,)
1 (64, 32, 32, 3) (64,)
2 (64, 32, 32, 3) (64,)
3 (64, 32, 32, 3) (64,)
4 (64, 32, 32, 3) (64,)
5 (64, 32, 32, 3) (64,)
6 (64, 32, 32, 3) (64,)


In [6]:
def flatten(x):
    """    
    Input:
    - TensorFlow Tensor of shape (N, D1, ..., DM)
    
    Output:
    - TensorFlow Tensor of shape (N, D1 * ... * DM)
    """
    N = tf.shape(x)[0]
    return tf.reshape(x, (N, -1))

In [7]:
def test_flatten():
    # Construct concrete values of the input data x using numpy
    x_np = np.arange(24).reshape((2, 3, 4))
    print('x_np:\n', x_np, '\n')
    # Compute a concrete output value.
    x_flat_np = flatten(x_np)
    print('x_flat_np:\n', x_flat_np, '\n')

test_flatten()

x_np:
 [[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]] 

x_flat_np:
 tf.Tensor(
[[ 0  1  2  3  4  5  6  7  8  9 10 11]
 [12 13 14 15 16 17 18 19 20 21 22 23]], shape=(2, 12), dtype=int64) 



In [8]:
def two_layer_fc(x, params):
    """
    A fully-connected neural network; the architecture is:
    fully-connected layer -> ReLU -> fully connected layer.
    Note that we only need to define the forward pass here; TensorFlow will take
    care of computing the gradients for us.
    
    The input to the network will be a minibatch of data, of shape
    (N, d1, ..., dM) where d1 * ... * dM = D. The hidden layer will have H units,
    and the output layer will produce scores for C classes.

    Inputs:
    - x: A TensorFlow Tensor of shape (N, d1, ..., dM) giving a minibatch of
      input data.
    - params: A list [w1, w2] of TensorFlow Tensors giving weights for the
      network, where w1 has shape (D, H) and w2 has shape (H, C).
    
    Returns:
    - scores: A TensorFlow Tensor of shape (N, C) giving classification scores
      for the input data x.
    """
    w1, w2 = params                   # Unpack the parameters
    x = flatten(x)                    # Flatten the input; now x has shape (N, D)
    h = tf.nn.relu(tf.matmul(x, w1))  # Hidden layer: h has shape (N, H)
    scores = tf.matmul(h, w2)         # Compute scores of shape (N, C)
    return scores

In [9]:
def two_layer_fc_test():
    hidden_layer_size = 42

    # Scoping our TF operations under a tf.device context manager 
    # lets us tell TensorFlow where we want these Tensors to be
    # multiplied and/or operated on, e.g. on a CPU or a GPU.
    with tf.device(device):        
        x = tf.zeros((64, 32, 32, 3))
        w1 = tf.zeros((32 * 32 * 3, hidden_layer_size))
        w2 = tf.zeros((hidden_layer_size, 10))

        # Call our two_layer_fc function for the forward pass of the network.
        scores = two_layer_fc(x, [w1, w2])

    print(scores.shape)

two_layer_fc_test()

(64, 10)


In [12]:
def three_layer_convnet(x, params):
    """
    A three-layer convolutional network with the architecture described above.
    
    Inputs:
    - x: A TensorFlow Tensor of shape (N, H, W, 3) giving a minibatch of images
    - params: A list of TensorFlow Tensors giving the weights and biases for the
      network; should contain the following:
      - conv_w1: TensorFlow Tensor of shape (KH1, KW1, 3, channel_1) giving
        weights for the first convolutional layer.
      - conv_b1: TensorFlow Tensor of shape (channel_1,) giving biases for the
        first convolutional layer.
      - conv_w2: TensorFlow Tensor of shape (KH2, KW2, channel_1, channel_2)
        giving weights for the second convolutional layer
      - conv_b2: TensorFlow Tensor of shape (channel_2,) giving biases for the
        second convolutional layer.
      - fc_w: TensorFlow Tensor giving weights for the fully-connected layer.
        Can you figure out what the shape should be?
      - fc_b: TensorFlow Tensor giving biases for the fully-connected layer.
        Can you figure out what the shape should be?
    """
    conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b = params
    scores = None
    ############################################################################
    # TODO: Implement the forward pass for the three-layer ConvNet.            #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    x_padded = tf.pad(x,[[0,0],[2,2],[2,2],[0,0]],'CONSTANT')
    conv1 = tf.nn.conv2d(x_padded, conv_w1,[1,1,1,1], padding = 'VALID') + conv_b1
    relu_c1 = tf.nn.relu(conv1)
    x_padded_c2 = tf.pad(relu_c1,[[0,0],[1,1],[1,1],[0,0]],'CONSTANT')
    conv2 = tf.nn.conv2d(x_padded_c2, conv_w2,[1,1,1,1], padding = 'VALID') + conv_b2
    relu_c2 = tf.nn.relu(conv2)
    relu_flatten = flatten(relu_c2)
    scores = tf.matmul(relu_flatten, fc_w) +fc_b

    pass

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                              END OF YOUR CODE                            #
    ############################################################################
    return scores

In [13]:
def three_layer_convnet_test():
    
    with tf.device(device):
        x = tf.zeros((64, 32, 32, 3))
        conv_w1 = tf.zeros((5, 5, 3, 6))
        conv_b1 = tf.zeros((6,))
        conv_w2 = tf.zeros((3, 3, 6, 9))
        conv_b2 = tf.zeros((9,))
        fc_w = tf.zeros((32 * 32 * 9, 10))
        fc_b = tf.zeros((10,))
        params = [conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b]
        scores = three_layer_convnet(x, params)

    # Inputs to convolutional layers are 4-dimensional arrays with shape
    # [batch_size, height, width, channels]
    print('scores_np has shape: ', scores.shape)

three_layer_convnet_test()

scores_np has shape:  (64, 10)


In [14]:
def training_step(model_fn, x, y, params, learning_rate):
    with tf.GradientTape() as tape:
        scores = model_fn(x, params) # Forward pass of the model
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=scores)
        total_loss = tf.reduce_mean(loss)
        grad_params = tape.gradient(total_loss, params)

        # Make a vanilla gradient descent step on all of the model parameters
        # Manually update the weights using assign_sub()
        for w, grad_w in zip(params, grad_params):
            w.assign_sub(learning_rate * grad_w)
                        
        return total_loss

In [15]:
def check_accuracy(dset, x, model_fn, params):
    """
    Check accuracy on a classification model, e.g. for validation.
    
    Inputs:
    - dset: A Dataset object against which to check accuracy
    - x: A TensorFlow placeholder Tensor where input images should be fed
    - model_fn: the Model we will be calling to make predictions on x
    - params: parameters for the model_fn to work with
      
    Returns: Nothing, but prints the accuracy of the model
    """
    num_correct, num_samples = 0, 0
    for x_batch, y_batch in dset:
        scores_np = model_fn(x_batch, params).numpy()
        y_pred = scores_np.argmax(axis=1)
        num_samples += x_batch.shape[0]
        num_correct += (y_pred == y_batch).sum()
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f%%)' % (num_correct, num_samples, 100 * acc))

In [16]:
def train_part2(model_fn, init_fn, learning_rate):
    """
    Train a model on CIFAR-10.
    
    Inputs:
    - model_fn: A Python function that performs the forward pass of the model
      using TensorFlow; it should have the following signature:
      scores = model_fn(x, params) where x is a TensorFlow Tensor giving a
      minibatch of image data, params is a list of TensorFlow Tensors holding
      the model weights, and scores is a TensorFlow Tensor of shape (N, C)
      giving scores for all elements of x.
    - init_fn: A Python function that initializes the parameters of the model.
      It should have the signature params = init_fn() where params is a list
      of TensorFlow Tensors holding the (randomly initialized) weights of the
      model.
    - learning_rate: Python float giving the learning rate to use for SGD.
    """
    
    
    params = init_fn()  # Initialize the model parameters            
        
    for t, (x_np, y_np) in enumerate(train_dset):
        # Run the graph on a batch of training data.
        loss = training_step(model_fn, x_np, y_np, params, learning_rate)
        
        # Periodically print the loss and check accuracy on the val set.
        if t % print_every == 0:
            print('Iteration %d, loss = %.4f' % (t, loss))
            check_accuracy(val_dset, x_np, model_fn, params)

In [17]:
def create_matrix_with_kaiming_normal(shape):
    if len(shape) == 2:
        fan_in, fan_out = shape[0], shape[1]
    elif len(shape) == 4:
        fan_in, fan_out = np.prod(shape[:3]), shape[3]
    return tf.keras.backend.random_normal(shape) * np.sqrt(2.0 / fan_in)

In [18]:
def two_layer_fc_init():
    """
    Initialize the weights of a two-layer network, for use with the
    two_layer_network function defined above. 
    You can use the `create_matrix_with_kaiming_normal` helper!
    
    Inputs: None
    
    Returns: A list of:
    - w1: TensorFlow tf.Variable giving the weights for the first layer
    - w2: TensorFlow tf.Variable giving the weights for the second layer
    """
    hidden_layer_size = 4000
    w1 = tf.Variable(create_matrix_with_kaiming_normal((3 * 32 * 32, 4000)))
    w2 = tf.Variable(create_matrix_with_kaiming_normal((4000, 10)))
    return [w1, w2]

learning_rate = 1e-2
train_part2(two_layer_fc, two_layer_fc_init, learning_rate)

Iteration 0, loss = 3.2506
Got 117 / 1000 correct (11.70%)
Iteration 100, loss = 1.8461
Got 392 / 1000 correct (39.20%)
Iteration 200, loss = 1.5201
Got 384 / 1000 correct (38.40%)
Iteration 300, loss = 1.8027
Got 380 / 1000 correct (38.00%)
Iteration 400, loss = 1.7688
Got 420 / 1000 correct (42.00%)
Iteration 500, loss = 1.8658
Got 442 / 1000 correct (44.20%)
Iteration 600, loss = 1.8008
Got 411 / 1000 correct (41.10%)
Iteration 700, loss = 1.9818
Got 457 / 1000 correct (45.70%)


In [19]:
def three_layer_convnet_init():
    """
    Initialize the weights of a Three-Layer ConvNet, for use with the
    three_layer_convnet function defined above.
    You can use the `create_matrix_with_kaiming_normal` helper!
    
    Inputs: None
    
    Returns a list containing:
    - conv_w1: TensorFlow tf.Variable giving weights for the first conv layer
    - conv_b1: TensorFlow tf.Variable giving biases for the first conv layer
    - conv_w2: TensorFlow tf.Variable giving weights for the second conv layer
    - conv_b2: TensorFlow tf.Variable giving biases for the second conv layer
    - fc_w: TensorFlow tf.Variable giving weights for the fully-connected layer
    - fc_b: TensorFlow tf.Variable giving biases for the fully-connected layer
    
    - params: A list of TensorFlow Tensors giving the weights and biases for the
      network; should contain the following:
      - conv_w1: TensorFlow Tensor of shape (KH1, KW1, 3, channel_1) giving
        weights for the first convolutional layer.
      - conv_b1: TensorFlow Tensor of shape (channel_1,) giving biases for the
        first convolutional layer.
      - conv_w2: TensorFlow Tensor of shape (KH2, KW2, channel_1, channel_2)
        giving weights for the second convolutional layer
      - conv_b2: TensorFlow Tensor of shape (channel_2,) giving biases for the
        second convolutional layer.
      - fc_w: TensorFlow Tensor giving weights for the fully-connected layer.
        Can you figure out what the shape should be?
      - fc_b: TensorFlow Tensor giving biases for the fully-connected layer.
        Can you figure out what the shape should be?
    """
    params = None
    ############################################################################
    # TODO: Initialize the parameters of the three-layer network.              #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    conv_w1 = tf.Variable(create_matrix_with_kaiming_normal([5,5,3,32]))
    conv_b1 = tf.Variable(np.zeros([32]), dtype=tf.float32)
    conv_w2 = tf.Variable(create_matrix_with_kaiming_normal([3,3,32,16]))
    conv_b2 = tf.Variable(np.zeros([16]), dtype=tf.float32)
    fc_w = tf.Variable(create_matrix_with_kaiming_normal([32*32*16, 10]))

    fc_b = tf.Variable(np.zeros([10]), dtype=tf.float32)
    params = (conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b)
    


    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                             END OF YOUR CODE                             #
    ############################################################################
    return params

# learning_rate = 3e-3
train_part2(three_layer_convnet, three_layer_convnet_init, learning_rate = 3e-3)

Iteration 0, loss = 2.5802
Got 95 / 1000 correct (9.50%)
Iteration 100, loss = 1.8617
Got 350 / 1000 correct (35.00%)
Iteration 200, loss = 1.6357
Got 386 / 1000 correct (38.60%)
Iteration 300, loss = 1.6495
Got 390 / 1000 correct (39.00%)
Iteration 400, loss = 1.7159
Got 442 / 1000 correct (44.20%)
Iteration 500, loss = 1.7644
Got 451 / 1000 correct (45.10%)
Iteration 600, loss = 1.7261
Got 452 / 1000 correct (45.20%)
Iteration 700, loss = 1.7186
Got 473 / 1000 correct (47.30%)


In [20]:
class TwoLayerFC(tf.keras.Model):
    def __init__(self, hidden_size, num_classes):
        super(TwoLayerFC, self).__init__()        
        initializer = tf.initializers.VarianceScaling(scale=2.0)
        self.fc1 = tf.keras.layers.Dense(hidden_size, activation='relu',
                                   kernel_initializer=initializer)
        self.fc2 = tf.keras.layers.Dense(num_classes, activation='softmax',
                                   kernel_initializer=initializer)
        self.flatten = tf.keras.layers.Flatten()
    
    def call(self, x, training=False):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return x


def test_TwoLayerFC():
    """ A small unit test to exercise the TwoLayerFC model above. """
    input_size, hidden_size, num_classes = 50, 42, 10
    x = tf.zeros((64, input_size))
    model = TwoLayerFC(hidden_size, num_classes)
    with tf.device(device):
        scores = model(x)
        print(scores.shape)
        
test_TwoLayerFC()

(64, 10)


In [21]:
class ThreeLayerConvNet(tf.keras.Model):
    def __init__(self, channel_1, channel_2, num_classes):
        super(ThreeLayerConvNet, self).__init__()
        ########################################################################
        # TODO: Implement the __init__ method for a three-layer ConvNet. You   #
        # should instantiate layer objects to be used in the forward pass.     #
        ########################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        initializer = tf.initializers.VarianceScaling(scale=3.0)
        self.conv1 = tf.keras.layers.Conv2D(filters = channel_1, kernel_size = (5,5), strides = (1,1),
                                          activation='relu',padding='valid',kernel_initializer=initializer)
        self.conv2 = tf.keras.layers.Conv2D(filters = channel_2, kernel_size = (3,3), strides = (1,1),
                                          activation='relu',padding='valid',kernel_initializer=initializer)
        self.fc = tf.keras.layers.Dense(num_classes,kernel_initializer=initializer)
        self.flatten = tf.keras.layers.Flatten()
        self.softmax = tf.keras.layers.Softmax()   
        

        pass

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ########################################################################
        #                           END OF YOUR CODE                           #
        ########################################################################
        
    def call(self, x, training=False):
        scores = None
        ########################################################################
        # TODO: Implement the forward pass for a three-layer ConvNet. You      #
        # should use the layer objects defined in the __init__ method.         #
        ########################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        x = tf.pad(x, [[0,0], [2,2], [2,2], [0,0]], 'CONSTANT')
        x = self.conv1(x)
        x = tf.pad(x, [[0,0], [1,1], [1,1], [0,0]], 'CONSTANT')
        x = self.conv2(x)
        x = self.flatten(x)
        x = self.fc(x)
        x = self.softmax(x)
        scores = x
        

        pass

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ########################################################################
        #                           END OF YOUR CODE                           #
        ########################################################################        
        return scores

In [22]:
def test_ThreeLayerConvNet():    
    channel_1, channel_2, num_classes = 12, 8, 10
    model = ThreeLayerConvNet(channel_1, channel_2, num_classes)
    with tf.device(device):
        x = tf.zeros((64, 3, 32, 32))
        scores = model(x)
        print(scores.shape)

test_ThreeLayerConvNet()

(64, 10)


In [23]:
def train_part34(model_init_fn, optimizer_init_fn, num_epochs=1, is_training=False):
    """
    Simple training loop for use with models defined using tf.keras. It trains
    a model for one epoch on the CIFAR-10 training set and periodically checks
    accuracy on the CIFAR-10 validation set.
    
    Inputs:
    - model_init_fn: A function that takes no parameters; when called it
      constructs the model we want to train: model = model_init_fn()
    - optimizer_init_fn: A function which takes no parameters; when called it
      constructs the Optimizer object we will use to optimize the model:
      optimizer = optimizer_init_fn()
    - num_epochs: The number of epochs to train for
    
    Returns: Nothing, but prints progress during trainingn
    """    
    with tf.device(device):

        # Compute the loss like we did in Part II
        loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
        
        model = model_init_fn()
        optimizer = optimizer_init_fn()
        
        train_loss = tf.keras.metrics.Mean(name='train_loss')
        train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
    
        val_loss = tf.keras.metrics.Mean(name='val_loss')
        val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')
        
        t = 0
        for epoch in range(num_epochs):
            
            # Reset the metrics - https://www.tensorflow.org/alpha/guide/migration_guide#new-style_metrics
            train_loss.reset_states()
            train_accuracy.reset_states()
            
            for x_np, y_np in train_dset:
                with tf.GradientTape() as tape:
                    
                    # Use the model function to build the forward pass.
                    scores = model(x_np, training=is_training)
                    loss = loss_fn(y_np, scores)
      
                    gradients = tape.gradient(loss, model.trainable_variables)
                    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
                    
                    # Update the metrics
                    train_loss.update_state(loss)
                    train_accuracy.update_state(y_np, scores)
                    
                    if t % print_every == 0:
                        val_loss.reset_states()
                        val_accuracy.reset_states()
                        for test_x, test_y in val_dset:
                            # During validation at end of epoch, training set to False
                            prediction = model(test_x, training=False)
                            t_loss = loss_fn(test_y, prediction)

                            val_loss.update_state(t_loss)
                            val_accuracy.update_state(test_y, prediction)
                        
                        template = 'Iteration {}, Epoch {}, Loss: {}, Accuracy: {}, Val Loss: {}, Val Accuracy: {}'
                        print (template.format(t, epoch+1,
                                             train_loss.result(),
                                             train_accuracy.result()*100,
                                             val_loss.result(),
                                             val_accuracy.result()*100))
                    t += 1

In [24]:
hidden_size, num_classes = 4000, 10
learning_rate = 1e-2

def model_init_fn():
    return TwoLayerFC(hidden_size, num_classes)

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate)

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 3.714242935180664, Accuracy: 7.8125, Val Loss: 3.073024272918701, Val Accuracy: 12.700000762939453
Iteration 100, Epoch 1, Loss: 2.2419300079345703, Accuracy: 28.85210418701172, Val Loss: 1.9538294076919556, Val Accuracy: 37.400001525878906
Iteration 200, Epoch 1, Loss: 2.0764684677124023, Accuracy: 32.548194885253906, Val Loss: 1.8857735395431519, Val Accuracy: 41.0
Iteration 300, Epoch 1, Loss: 2.000739097595215, Accuracy: 34.40095520019531, Val Loss: 1.8929636478424072, Val Accuracy: 38.29999923706055
Iteration 400, Epoch 1, Loss: 1.931082010269165, Accuracy: 36.17129135131836, Val Loss: 1.7463773488998413, Val Accuracy: 42.29999923706055
Iteration 500, Epoch 1, Loss: 1.8873789310455322, Accuracy: 37.150699615478516, Val Loss: 1.6892452239990234, Val Accuracy: 42.69999694824219
Iteration 600, Epoch 1, Loss: 1.8548344373703003, Accuracy: 38.09016418457031, Val Loss: 1.7050995826721191, Val Accuracy: 42.099998474121094
Iteration 700, Epoch 1, Loss: 1.828608

In [25]:
learning_rate = 3e-3
channel_1, channel_2, num_classes = 32, 16, 10

def model_init_fn():
    model = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    model = ThreeLayerConvNet(channel_1, channel_2, num_classes)


    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return model

def optimizer_init_fn():
    optimizer = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate , momentum=0.9, nesterov= True)


    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return optimizer

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 3.9656412601470947, Accuracy: 12.5, Val Loss: 9.084972381591797, Val Accuracy: 10.59999942779541
Iteration 100, Epoch 1, Loss: 2.320930004119873, Accuracy: 26.670793533325195, Val Loss: 1.7614037990570068, Val Accuracy: 38.10000228881836
Iteration 200, Epoch 1, Loss: 1.9998316764831543, Accuracy: 33.3722038269043, Val Loss: 1.5828783512115479, Val Accuracy: 45.69999694824219
Iteration 300, Epoch 1, Loss: 1.8552477359771729, Accuracy: 37.11067199707031, Val Loss: 1.5074924230575562, Val Accuracy: 48.10000228881836
Iteration 400, Epoch 1, Loss: 1.7522270679473877, Accuracy: 40.09507751464844, Val Loss: 1.4361681938171387, Val Accuracy: 49.900001525878906
Iteration 500, Epoch 1, Loss: 1.678235411643982, Accuracy: 42.39021682739258, Val Loss: 1.3802752494812012, Val Accuracy: 50.19999694824219
Iteration 600, Epoch 1, Loss: 1.628665566444397, Accuracy: 43.78119659423828, Val Loss: 1.3327404260635376, Val Accuracy: 53.39999771118164
Iteration 700, Epoch 1, Loss: 1

In [26]:
learning_rate = 1e-2

def model_init_fn():
    input_shape = (32, 32, 3)
    hidden_layer_size, num_classes = 4000, 10
    initializer = tf.initializers.VarianceScaling(scale=2.0)
    layers = [
        tf.keras.layers.Flatten(input_shape=input_shape),
        tf.keras.layers.Dense(hidden_layer_size, activation='relu',
                              kernel_initializer=initializer),
        tf.keras.layers.Dense(num_classes, activation='softmax', 
                              kernel_initializer=initializer),
    ]
    model = tf.keras.Sequential(layers)
    return model

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate) 

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 3.0119621753692627, Accuracy: 7.8125, Val Loss: 3.011324644088745, Val Accuracy: 12.899999618530273
Iteration 100, Epoch 1, Loss: 2.251871347427368, Accuracy: 28.217823028564453, Val Loss: 1.8919093608856201, Val Accuracy: 38.10000228881836
Iteration 200, Epoch 1, Loss: 2.075791358947754, Accuracy: 32.105098724365234, Val Loss: 1.889932632446289, Val Accuracy: 38.400001525878906
Iteration 300, Epoch 1, Loss: 2.003833293914795, Accuracy: 33.97529220581055, Val Loss: 1.8569828271865845, Val Accuracy: 36.599998474121094
Iteration 400, Epoch 1, Loss: 1.9311048984527588, Accuracy: 35.816707611083984, Val Loss: 1.7388664484024048, Val Accuracy: 41.5
Iteration 500, Epoch 1, Loss: 1.8866020441055298, Accuracy: 36.91679382324219, Val Loss: 1.653226375579834, Val Accuracy: 43.5
Iteration 600, Epoch 1, Loss: 1.8569049835205078, Accuracy: 37.840576171875, Val Loss: 1.713360071182251, Val Accuracy: 42.29999923706055
Iteration 700, Epoch 1, Loss: 1.8294674158096313, Accur

In [27]:
model = model_init_fn()
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate),
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.sparse_categorical_accuracy])
model.fit(X_train, y_train, batch_size=64, epochs=1, validation_data=(X_val, y_val))
model.evaluate(X_test, y_test)



[1.6083589792251587, 0.4442000091075897]

In [28]:
def model_init_fn():
    model = None
    ############################################################################
    # TODO: Construct a three-layer ConvNet using tf.keras.Sequential.         #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    input_shape = (32,32,3)
    chanel_1, chanel_2, num_classes = 32, 16, 10
    initializer = tf.initializers.VarianceScaling(scale=2.0)
    layers = [
        tf.keras.layers.InputLayer(input_shape=input_shape),
        tf.keras.layers.Conv2D(filters = chanel_1, kernel_size = (5,5), strides = (1,1),
                                           activation='relu',padding='same',kernel_initializer=initializer),
        tf.keras.layers.Conv2D(filters = chanel_2, kernel_size = (3,3), strides = (1,1),
                                          activation='relu',padding='same',kernel_initializer=initializer),
        tf.keras.layers.Flatten(),
        # tf.keras.layers.Dense(filters = num_classes,kernel_initializer=initializer),
        tf.keras.layers.Dense(num_classes, kernel_initializer=initializer),
        tf.keras.layers.Softmax()
    ]
    model = tf.keras.Sequential(layers)
    return model
    

    # pass

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                            END OF YOUR CODE                              #
    ############################################################################
    # return model

learning_rate = 5e-4
def optimizer_init_fn():
    optimizer = None
    ############################################################################
    # TODO: Complete the implementation of model_fn.                           #
    ############################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum = 0.9, nesterov= True) 

    pass

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    ############################################################################
    #                           END OF YOUR CODE                               #
    ############################################################################
    return optimizer

train_part34(model_init_fn, optimizer_init_fn)

Iteration 0, Epoch 1, Loss: 2.823869228363037, Accuracy: 12.5, Val Loss: 2.6458985805511475, Val Accuracy: 10.5
Iteration 100, Epoch 1, Loss: 1.9743764400482178, Accuracy: 30.476484298706055, Val Loss: 1.7752528190612793, Val Accuracy: 39.39999771118164
Iteration 200, Epoch 1, Loss: 1.8500899076461792, Accuracy: 35.059078216552734, Val Loss: 1.6507887840270996, Val Accuracy: 43.29999923706055
Iteration 300, Epoch 1, Loss: 1.7830449342727661, Accuracy: 37.32350540161133, Val Loss: 1.608421802520752, Val Accuracy: 44.80000305175781
Iteration 400, Epoch 1, Loss: 1.7239418029785156, Accuracy: 39.32746505737305, Val Loss: 1.5467864274978638, Val Accuracy: 46.20000076293945
Iteration 500, Epoch 1, Loss: 1.6825448274612427, Accuracy: 40.69673156738281, Val Loss: 1.5109102725982666, Val Accuracy: 47.400001525878906
Iteration 600, Epoch 1, Loss: 1.654444694519043, Accuracy: 41.71173095703125, Val Loss: 1.4871400594711304, Val Accuracy: 49.70000076293945
Iteration 700, Epoch 1, Loss: 1.629434466

In [29]:
model = model_init_fn()
model.compile(optimizer='sgd',
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.sparse_categorical_accuracy])
model.fit(X_train, y_train, batch_size=64, epochs=1, validation_data=(X_val, y_val))
model.evaluate(X_test, y_test)



[1.4212288856506348, 0.49129998683929443]

In [30]:
def two_layer_fc_functional(input_shape, hidden_size, num_classes):  
    initializer = tf.initializers.VarianceScaling(scale=2.0)
    inputs = tf.keras.Input(shape=input_shape)
    flattened_inputs = tf.keras.layers.Flatten()(inputs)
    fc1_output = tf.keras.layers.Dense(hidden_size, activation='relu',
                                 kernel_initializer=initializer)(flattened_inputs)
    scores = tf.keras.layers.Dense(num_classes, activation='softmax',
                             kernel_initializer=initializer)(fc1_output)

    # Instantiate the model given inputs and outputs.
    model = tf.keras.Model(inputs=inputs, outputs=scores)
    return model

def test_two_layer_fc_functional():
    """ A small unit test to exercise the TwoLayerFC model above. """
    input_size, hidden_size, num_classes = 50, 42, 10
    input_shape = (50,)
    
    x = tf.zeros((64, input_size))
    model = two_layer_fc_functional(input_shape, hidden_size, num_classes)
    
    with tf.device(device):
        scores = model(x)
        print(scores.shape)
        
test_two_layer_fc_functional()

(64, 10)


In [31]:
input_shape = (32, 32, 3)
hidden_size, num_classes = 4000, 10
learning_rate = 1e-2

def model_init_fn():
    return two_layer_fc_functional(input_shape, hidden_size, num_classes)

def optimizer_init_fn():
    return tf.keras.optimizers.SGD(learning_rate=learning_rate)

train_part34(model_init_fn, optimizer_init_fn)


Iteration 0, Epoch 1, Loss: 3.1066348552703857, Accuracy: 9.375, Val Loss: 2.8506009578704834, Val Accuracy: 14.200000762939453
Iteration 100, Epoch 1, Loss: 2.2456891536712646, Accuracy: 28.29517364501953, Val Loss: 1.8707401752471924, Val Accuracy: 39.099998474121094
Iteration 200, Epoch 1, Loss: 2.0742945671081543, Accuracy: 32.26057434082031, Val Loss: 1.8533170223236084, Val Accuracy: 40.5
Iteration 300, Epoch 1, Loss: 1.9978175163269043, Accuracy: 34.08949279785156, Val Loss: 1.8381661176681519, Val Accuracy: 38.80000305175781
Iteration 400, Epoch 1, Loss: 1.9315049648284912, Accuracy: 35.879051208496094, Val Loss: 1.721449851989746, Val Accuracy: 42.79999923706055
Iteration 500, Epoch 1, Loss: 1.8863307237625122, Accuracy: 36.94485855102539, Val Loss: 1.6466484069824219, Val Accuracy: 43.599998474121094
Iteration 600, Epoch 1, Loss: 1.8551926612854004, Accuracy: 37.89517593383789, Val Loss: 1.673884391784668, Val Accuracy: 43.20000076293945
Iteration 700, Epoch 1, Loss: 1.829090

In [32]:
input_shape = (32, 32, 3)

inputs = tf.keras.Input(shape=input_shape)
inputs

<KerasTensor: shape=(None, 32, 32, 3) dtype=float32 (created by layer 'input_5')>

In [None]:
class CustomConvNet(tf.keras.Model):
    def __init__(self, channel_1, channel_2, channel_3, num_classes):
        super(CustomConvNet, self).__init__()
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        initializer = tf.initializers.VarianceScaling(scale=2.0)
      

        self.conv1 = tf.keras.layers.Conv2D(filters = channel_1, kernel_size = (3,3), strides = (1,1),
                                          activation='relu',padding='valid',kernel_initializer=initializer)
        # self.drop_out = tf.keras.layers.Dropout(rate = 0.8)
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.drop_out1 = tf.keras.layers.Dropout(rate = 0.2)
        self.max_pool1 = tf.keras.layers.MaxPool2D(pool_size=(2, 2))
        self.conv2 = tf.keras.layers.Conv2D(filters = channel_2, kernel_size = (7,7), strides = (1,1),
                                          activation='relu',padding='valid',kernel_initializer=initializer)
        self.bn2 = tf.keras.layers.BatchNormalization()
        self.drop_out2 = tf.keras.layers.Dropout(rate = 0.2)
        self.max_pool2 = tf.keras.layers.MaxPool2D(pool_size=(2, 2))
        self.conv3 = tf.keras.layers.Conv2D(filters = channel_3, kernel_size = (5,5), strides = (1,1),
                                        activation='relu',padding='valid',kernel_initializer=initializer)
        self.bn3 = tf.keras.layers.BatchNormalization()
        self.max_pool3 = tf.keras.layers.MaxPool2D(pool_size=(2, 2))
        self.drop_out3 = tf.keras.layers.Dropout(rate = 0.5)
        self.fc = tf.keras.layers.Dense(num_classes,kernel_initializer=initializer)
        self.global_avg = tf.keras.layers.GlobalAveragePooling2D()
        self.softmax = tf.keras.layers.Softmax()   

        pass

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################
    
    def call(self, input_tensor, training=False):
        ############################################################################
        # TODO: Construct a model that performs well on CIFAR-10                   #
        ############################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        x = tf.pad(input_tensor, [[0,0], [2,2], [2,2], [0,0]], 'CONSTANT')
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.drop_out1(x)
        x = self.max_pool1(x)
        x = tf.pad(x, [[0,0], [1,1], [1,1], [0,0]], 'CONSTANT')
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.drop_out2(x)
        x = self.max_pool2(x)
        x = tf.pad(x, [[0,0], [1,1], [1,1], [0,0]], 'CONSTANT')
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.drop_out3(x)
        x = self.max_pool3(x)        
        #x = self.flatten(x)
        x = self.global_avg(x)
        x = self.fc(x)
        x = self.softmax(x)

        # pass

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        ############################################################################
        #                            END OF YOUR CODE                              #
        ############################################################################
        return x


print_every = 700
num_epochs = 10

channel_1, channel_2, channel_3, num_classes = 256, 112, 64, 10

def model_init_fn():
    mode = None
    model = CustomConvNet(channel_1, channel_2, channel_3, num_classes)
    return model

def optimizer_init_fn():
    learning_rate = 1e-3
    return tf.keras.optimizers.Adam(learning_rate) 

train_part34(model_init_fn, optimizer_init_fn, num_epochs=num_epochs, is_training=True)

Iteration 0, Epoch 1, Loss: 4.229311943054199, Accuracy: 14.0625, Val Loss: 7.876084327697754, Val Accuracy: 10.899999618530273
Iteration 700, Epoch 1, Loss: 1.3885812759399414, Accuracy: 51.627140045166016, Val Loss: 1.1855409145355225, Val Accuracy: 59.500003814697266
Iteration 1400, Epoch 2, Loss: 0.9259873032569885, Accuracy: 67.67716217041016, Val Loss: 0.9837809205055237, Val Accuracy: 66.9000015258789
Iteration 2100, Epoch 3, Loss: 0.7644560933113098, Accuracy: 73.05305480957031, Val Loss: 0.844839334487915, Val Accuracy: 71.69999694824219
Iteration 2800, Epoch 4, Loss: 0.6599968671798706, Accuracy: 76.80480194091797, Val Loss: 0.9524750113487244, Val Accuracy: 68.19999694824219
Iteration 3500, Epoch 5, Loss: 0.5857861638069153, Accuracy: 79.48011779785156, Val Loss: 0.8167557120323181, Val Accuracy: 73.5999984741211
Iteration 4200, Epoch 6, Loss: 0.5185089707374573, Accuracy: 81.69221496582031, Val Loss: 0.8618490099906921, Val Accuracy: 73.29999542236328
Iteration 4900, Epoch 