In [1]:
import tensorflow as tf
import numpy as np
from time import time
from datetime import datetime
import math
import matplotlib.pyplot as plt
from PIL import Image

USE_GPU = False

  from ._conv import register_converters as _register_converters


In [2]:
import os

In [3]:
os.getcwd()

'E:\\courses\\stat231A\\homework1'

In [4]:
log = lambda *args: print(datetime.now().strftime('%H:%M:%S'), ':', *args)

In [5]:
def load_cifar10(num_training=49000, num_validation=1000, num_test=10000):
    cifar10 = tf.keras.datasets.cifar10.load_data()
    (X_train, y_train), (X_test, y_test) = cifar10
    X_train = np.asarray(X_train, dtype=np.float32)
    y_train = np.asarray(y_train, dtype=np.int32).flatten()
    X_test = np.asarray(X_test, dtype=np.float32)
    y_test = np.asarray(y_test, dtype=np.int32).flatten()

    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    mean_pixel = X_train.mean(axis=(0, 1, 2), keepdims=True)
    std_pixel = X_train.std(axis=(0, 1, 2), keepdims=True)
    X_train = (X_train - mean_pixel) / std_pixel
    X_val = (X_val - mean_pixel) / std_pixel
    X_test = (X_test - mean_pixel) / std_pixel

    return X_train, y_train, X_val, y_val, X_test, y_test, mean_pixel, std_pixel


In [6]:
class Dataset(object):
    def __init__(self, X, y, batch_size, shuffle=False):
        assert X.shape[0] == y.shape[0]
        self.X, self.y = X, y
        self.batch_size, self.shuffle = batch_size, shuffle

    def __iter__(self):
        N, B = self.X.shape[0], self.batch_size
        idxs = np.arange(N)
        if self.shuffle:
            np.random.shuffle(idxs)
        return iter((self.X[i:i + B], self.y[i:i + B]) for i in range(0, N, B))


In [7]:
X_train, y_train, X_val, y_val, X_test, y_test, mean_pixel, std_pixel = load_cifar10()
train_dset = Dataset(X_train, y_train, batch_size=64, shuffle=True)
val_dset = Dataset(X_val, y_val, batch_size=64, shuffle=False)
test_dset = Dataset(X_test, y_test, batch_size=64)


In [8]:
get_X_train_sample = lambda: next(iter(train_dset))[0][7] * std_pixel + mean_pixel

In [9]:
def select_device(use_gpu=True):
    from tensorflow.python.client import device_lib
    log(device_lib.list_local_devices())
    device = '/device:GPU:0' if use_gpu else '/CPU:0'
    log('Using device: ', device)
    return device

device = select_device(use_gpu=False)

12:32:32 : [name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 17852544518492867942
]
12:32:32 : Using device:  /CPU:0


In [2]:
def flatten(x):
    """
    Input:
    - TensorFlow Tensor of shape (N, D1, ..., DM)

    Output:
    - TensorFlow Tensor of shape (N, D1 * ... * DM)
    """
    ############################################################################
    # TODO: (1.a) Reshape tensor x into shape (N, D1 * ... * DM)               #
    ############################################################################

    input_shape = x.get_shape()
    dim = 1
    for d in input_shape.as_list()[1:]:
        dim *= d if not d == None else 1
    ############################################################################
    #                              END OF YOUR CODE                            #
    ############################################################################
    return tf.reshape(x, [-1, dim])

In [3]:
def kaiming_normal(shape):
    """
    He et al, *Delving Deep into Rectifiers: Surpassing Human-Level Performance on
    ImageNet Classification, ICCV 2015, https://arxiv.org/abs/1502.01852
    """
    if len(shape) == 2:
        fan_in, fan_out = shape[0], shape[1]
    elif len(shape) == 4:
        fan_in, fan_out = np.prod(shape[:3]), shape[3]
    return tf.random_normal(shape) * np.sqrt(2.0 / fan_in)

In [4]:
def convnet_init():
    """
    Initialize the weights of a Three-Layer ConvNet, for use with the
    three_layer_convnet function defined above.
    """

    conv_w1 = tf.Variable(kaiming_normal([5, 5, 3, 32]))
    conv_b1 = tf.Variable(tf.zeros(32,))

    conv_w2 = tf.Variable(kaiming_normal([5, 5, 32, 32]))
    conv_b2 = tf.Variable(tf.zeros(32,))

    conv_w3 = tf.Variable(kaiming_normal([5, 5, 32, 64]))
    conv_b3 = tf.Variable(tf.zeros(64,))

    ############################################################################
    # TODO: (1.a), (2.a) Initialize the remaining parameters.                  #
    ############################################################################

    conv_w4 = tf.Variable(kaiming_normal([4, 4, 64, 64]))
    conv_b4 = tf.Variable(tf.zeros(64,))
    conv_w5 =tf.Variable(kaiming_normal([1, 1, 64, 10]))
    conv_b5 = tf.Variable(tf.zeros(10,))

    ############################################################################
    #                             END OF YOUR CODE                             #
    ############################################################################

    params = [conv_w1, conv_b1, conv_w2, conv_b2, conv_w3, conv_b3, conv_w4, conv_b4, conv_w5, conv_b5]

    return params


In [13]:
def convnet_init_block1_only():
    """
    Initialize the weights of a Three-Layer ConvNet, for use with the
    three_layer_convnet function defined above.
    """

    conv_w1 = tf.Variable(kaiming_normal([5, 5, 3, 32]))
    conv_b1 = tf.Variable(tf.zeros(32,))

    conv_w5 =tf.Variable(kaiming_normal([1, 1, 32, 10]))
    conv_b5 = tf.Variable(tf.zeros(10,))

    params = [conv_w1, conv_b1, conv_w5, conv_b5]

    return params


In [14]:
def convnet_init_block1_and_block2():
    """
    Initialize the weights of a Three-Layer ConvNet, for use with the
    three_layer_convnet function defined above.
    """

    conv_w1 = tf.Variable(kaiming_normal([5, 5, 3, 32]))
    conv_b1 = tf.Variable(tf.zeros(32,))
    
    conv_w2 = tf.Variable(kaiming_normal([5, 5, 32, 32]))
    conv_b2 = tf.Variable(tf.zeros(32,))

    conv_w5 =tf.Variable(kaiming_normal([1, 1, 32, 10]))
    conv_b5 = tf.Variable(tf.zeros(10,))
    
    params = [conv_w1, conv_b1, conv_w2, conv_b2, conv_w5, conv_b5]

   
    return params


In [15]:
def convnet_init_block1_block2_and_block3():
    """
    Initialize the weights of a Three-Layer ConvNet, for use with the
    three_layer_convnet function defined above.
    """

    conv_w1 = tf.Variable(kaiming_normal([5, 5, 3, 32]))
    conv_b1 = tf.Variable(tf.zeros(32,))
    
    conv_w2 = tf.Variable(kaiming_normal([5, 5, 32, 32]))
    conv_b2 = tf.Variable(tf.zeros(32,))
    
    conv_w3 = tf.Variable(kaiming_normal([5, 5, 32, 64]))
    conv_b3 = tf.Variable(tf.zeros(64,))

    conv_w5 =tf.Variable(kaiming_normal([1, 1, 64, 10]))
    conv_b5 = tf.Variable(tf.zeros(10,))
    
    params = [conv_w1, conv_b1, conv_w2, conv_b2, conv_w3, conv_b3, conv_w5, conv_b5]

    return params


In [5]:
def convnet_forward(x, params):
    """
    A three-layer convolutional network.

    Args:
    - x: A TensorFlow Tensor of shape (N, H, W, 3) giving a minibatch of images
    - params: A list of TensorFlow Tensors giving the weights and biases for the network.
    """
    [conv_w1, conv_b1, conv_w2, conv_b2, conv_w3, conv_b3, conv_w4, conv_b4, conv_w5, conv_b5] = params

    # block 1
    x1_1_pad = tf.pad(x, [[0, 0], [2, 2], [2, 2], [0, 0]], mode='CONSTANT', constant_values=0)
    x1_2_conv = tf.nn.conv2d(x1_1_pad, conv_w1, [1, 1, 1, 1], padding='VALID') + conv_b1
    x1_3_pad = tf.pad(x1_2_conv, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='CONSTANT', constant_values=0)
    x1_4_pool = tf.nn.max_pool(x1_3_pad, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')
    x1_5_relu = tf.nn.relu(x1_4_pool)

    # block 2
    x2_1_pad = tf.pad(x1_5_relu, [[0, 0], [2, 2], [2, 2], [0, 0]], mode='CONSTANT', constant_values=0)
    x2_2_conv = tf.nn.conv2d(x2_1_pad, conv_w2, [1, 1, 1, 1], padding='VALID') + conv_b2
    x2_3_relu = tf.nn.relu(x2_2_conv)
    x2_4_pad = tf.pad(x2_3_relu, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='CONSTANT', constant_values=0)
    x2_5_pool = tf.nn.avg_pool(x2_4_pad, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')

    # block 3
    x3_1_pad = tf.pad(x2_5_pool, [[0, 0], [2, 2], [2, 2], [0, 0]], mode='CONSTANT', constant_values=0)
    x3_2_conv = tf.nn.conv2d(x3_1_pad, conv_w3, [1, 1, 1, 1], padding='VALID') + conv_b3
    x3_3_relu = tf.nn.relu(x3_2_conv)
    x3_4_pad = tf.pad(x3_3_relu, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='CONSTANT', constant_values=0)
    x3_5_pool = tf.nn.avg_pool(x3_4_pad, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')

    ############################################################################
    # TODO: (1.a), (2.a) Implement the remaining forward pass.                 #
    ############################################################################
    
    # block 4
    x4_1_conv = tf.nn.conv2d(x3_5_pool, conv_w4, [1, 1, 1, 1], padding="VALID") + conv_b4
    x4_2_relu = tf.nn.relu(x4_1_conv)
    
    #block 5
    x5_1_conv = tf.nn.conv2d(x4_2_relu, conv_w5, [1, 1, 1, 1], padding="VALID") + conv_b5
    
    ############################################################################
    #                              END OF YOUR CODE                            #
    ############################################################################
    print(x3_5_pool)
    logits = flatten(x5_1_conv)

    return logits

In [17]:
def convnet_forward_block1_only(x, params):
    """
    A three-layer convolutional network.

    Args:
    - x: A TensorFlow Tensor of shape (N, H, W, 3) giving a minibatch of images
    - params: A list of TensorFlow Tensors giving the weights and biases for the network.
    """
    [conv_w1, conv_b1, conv_w5, conv_b5] = params

    # block 1
    x1_1_pad = tf.pad(x, [[0, 0], [2, 2], [2, 2], [0, 0]], mode='CONSTANT', constant_values=0)
    x1_2_conv = tf.nn.conv2d(x1_1_pad, conv_w1, [1, 1, 1, 1], padding='VALID') + conv_b1
    x1_3_pad = tf.pad(x1_2_conv, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='CONSTANT', constant_values=0)
    x1_4_pool = tf.nn.max_pool(x1_3_pad, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')
    x1_5_relu = tf.nn.relu(x1_4_pool)
    
    #block 5
    x5_1_conv = tf.nn.conv2d(x1_5_relu, conv_w5, [1, 1, 1, 1], padding="VALID") + conv_b5

    logits = flatten(x5_1_conv)

    return logits

In [18]:
def convnet_forward_block1_and_block2(x, params):
    """
    A three-layer convolutional network.

    Args:
    - x: A TensorFlow Tensor of shape (N, H, W, 3) giving a minibatch of images
    - params: A list of TensorFlow Tensors giving the weights and biases for the network.
    """
    [conv_w1, conv_b1, conv_w2, conv_b2, conv_w5, conv_b5] = params


    # block 1
    x1_1_pad = tf.pad(x, [[0, 0], [2, 2], [2, 2], [0, 0]], mode='CONSTANT', constant_values=0)
    x1_2_conv = tf.nn.conv2d(x1_1_pad, conv_w1, [1, 1, 1, 1], padding='VALID') + conv_b1
    x1_3_pad = tf.pad(x1_2_conv, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='CONSTANT', constant_values=0)
    x1_4_pool = tf.nn.max_pool(x1_3_pad, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')
    x1_5_relu = tf.nn.relu(x1_4_pool)
    
    # block 2
    x2_1_pad = tf.pad(x1_5_relu, [[0, 0], [2, 2], [2, 2], [0, 0]], mode='CONSTANT', constant_values=0)
    x2_2_conv = tf.nn.conv2d(x2_1_pad, conv_w2, [1, 1, 1, 1], padding='VALID') + conv_b2
    x2_3_relu = tf.nn.relu(x2_2_conv)
    x2_4_pad = tf.pad(x2_3_relu, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='CONSTANT', constant_values=0)
    x2_5_pool = tf.nn.avg_pool(x2_4_pad, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')
    
    #block 5
    x5_1_conv = tf.nn.conv2d(x2_5_pool, conv_w5, [1, 1, 1, 1], padding="VALID") + conv_b5
    
    logits = flatten(x5_1_conv)

    return logits

In [19]:
def convnet_forward_block1_block2_and_block3(x, params):
    """
    A three-layer convolutional network.

    Args:
    - x: A TensorFlow Tensor of shape (N, H, W, 3) giving a minibatch of images
    - params: A list of TensorFlow Tensors giving the weights and biases for the network.
    """
    [conv_w1, conv_b1, conv_w2, conv_b2, conv_w3, conv_b3, conv_w5, conv_b5] = params

    # block 1
    x1_1_pad = tf.pad(x, [[0, 0], [2, 2], [2, 2], [0, 0]], mode='CONSTANT', constant_values=0)
    x1_2_conv = tf.nn.conv2d(x1_1_pad, conv_w1, [1, 1, 1, 1], padding='VALID') + conv_b1
    x1_3_pad = tf.pad(x1_2_conv, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='CONSTANT', constant_values=0)
    x1_4_pool = tf.nn.max_pool(x1_3_pad, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')
    x1_5_relu = tf.nn.relu(x1_4_pool)

    # block 2
    x2_1_pad = tf.pad(x1_5_relu, [[0, 0], [2, 2], [2, 2], [0, 0]], mode='CONSTANT', constant_values=0)
    x2_2_conv = tf.nn.conv2d(x2_1_pad, conv_w2, [1, 1, 1, 1], padding='VALID') + conv_b2
    x2_3_relu = tf.nn.relu(x2_2_conv)
    x2_4_pad = tf.pad(x2_3_relu, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='CONSTANT', constant_values=0)
    x2_5_pool = tf.nn.avg_pool(x2_4_pad, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')

    # block 3
    x3_1_pad = tf.pad(x2_5_pool, [[0, 0], [2, 2], [2, 2], [0, 0]], mode='CONSTANT', constant_values=0)
    x3_2_conv = tf.nn.conv2d(x3_1_pad, conv_w3, [1, 1, 1, 1], padding='VALID') + conv_b3
    x3_3_relu = tf.nn.relu(x3_2_conv)
    x3_4_pad = tf.pad(x3_3_relu, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='CONSTANT', constant_values=0)
    x3_5_pool = tf.nn.avg_pool(x3_4_pad, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')
    
    #block 5
    x5_1_conv = tf.nn.conv2d(x3_5_relu, conv_w5, [1, 1, 1, 1], padding="VALID") + conv_b5
    
    logits = flatten(x5_1_conv)

    return logits

In [20]:
def three_layer_convnet_test():
    tf.reset_default_graph()

    with tf.device(device):
        x = tf.placeholder(tf.float32)

        # block 1
        conv_w1 = tf.zeros([5, 5, 3, 32])
        conv_b1 = tf.zeros(32)

        # block 2
        conv_w2 = tf.zeros([5, 5, 32, 32])
        conv_b2 = tf.zeros(32)

        # block 3
        conv_w3 = tf.zeros([5, 5, 32, 64])
        conv_b3 = tf.zeros(64)

        ############################################################################
        # TODO: (1.a), (2.a) Initialize the parameters.                            #
        ############################################################################

        conv_w4 = tf.Variable(kaiming_normal([4, 4, 64, 64]))
        conv_b4 = tf.Variable(tf.zeros(64,))
        conv_w5 =tf.Variable(kaiming_normal([1, 1, 64, 10]))
        conv_b5 = tf.Variable(tf.zeros(10,))

        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        params = [conv_w1, conv_b1, conv_w2, conv_b2, conv_w3, conv_b3, conv_w4, conv_b4, conv_w5, conv_b5]
        logits = convnet_forward(x, params)

    # Inputs to convolutional layers are 4-dimensional arrays with shape [batch_size, height, width, channels]
    x_np = np.zeros((64, 32, 32, 3))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        logits_np = sess.run(logits, feed_dict={x: x_np})
        log('logits_np has shape', format(logits_np.shape))


with tf.device('/cpu:0'):
    three_layer_convnet_test()

Tensor("AvgPool_1:0", shape=(?, ?, ?, 64), dtype=float32, device=/device:CPU:0)
12:32:35 : logits_np has shape (64, 10)


In [21]:
def training_step(logits, y, params, learning_rate):
    """
    Set up the part of the computational graph which makes a training step.

    Args:
    - logits: TensorFlow Tensor of shape (N, C) giving classification scores for
      the model.
    - y: TensorFlow Tensor of shape (N,) giving ground-truth labels for scores;
      y[i] == c means that c is the correct class for scores[i].
    - params: List of TensorFlow Tensors giving the weights of the model
    - learning_rate: Python scalar giving the learning rate to use for gradient
      descent step.

    Returns:
    - loss: A TensorFlow Tensor of shape () (scalar) giving the loss for this
      batch of data; evaluating the loss also performs a gradient descent step
      on params (see above).
    """
    # First compute the loss; the first line gives losses for each example in
    # the mini-batch, and the second averages the losses across the batch
    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(losses)

    # Compute the gradient of the loss with respect to each parameter of the the
    # network. This is a very magical function call: TensorFlow internally
    # traverses the computational graph starting at loss backward to each element
    # of params, and uses back-propagation to figure out how to compute gradients;
    # it then adds new operations to the computational graph which compute the
    # requested gradients, and returns a list of TensorFlow Tensors that will
    # contain the requested gradients when evaluated.
    grad_params = tf.gradients(loss, params)

    # Make a gradient descent step on all of the model parameters.
    new_weights = []
    for w, grad_w in zip(params, grad_params):
        new_w = tf.assign_sub(w, learning_rate * grad_w)
        new_weights.append(new_w)

    # Insert a control dependency so that evaluating the loss causes a weight
    # update to happen.
    with tf.control_dependencies(new_weights):
        return tf.identity(loss)

In [22]:
def train(model_fn, init_fn, learning_rate, epochs, print_every=100):
    """
    Train a model on CIFAR-10.

    Args:
    - model_fn: A Python function that performs the forward pass of the model
      using TensorFlow; it should have the following signature:
      scores = model_fn(x, params) where x is a TensorFlow Tensor giving a
      minibatch of image data, params is a list of TensorFlow Tensors holding
      the model weights, and scores is a TensorFlow Tensor of shape (N, C)
      giving scores for all elements of x.
    - init_fn: A Python function that initializes the parameters of the model.
      It should have the signature params = init_fn() where params is a list
      of TensorFlow Tensors holding the (randomly initialized) weights of the
      model.
    - learning_rate: Python float giving the learning rate to use for SGD.
    """
    # First clear the default graph
    tf.reset_default_graph()
    is_training = tf.placeholder(tf.bool, name='is_training')
    # Set up the computational graph for performing forward and backward passes,
    # and weight updates.
    with tf.device(device):
        # Set up placeholders for the data and labels
        x = tf.placeholder(tf.float32, [None, 32, 32, 3])
        y = tf.placeholder(tf.int32, [None])
        params = init_fn()  # Initialize the model parameters
        scores = model_fn(x, params)  # Forward pass of the model
        loss = training_step(scores, y, params, learning_rate)

    train_losses = []
    test_accuracies = []
    
    current_best_acc = 0

    # Now we actually run the graph many times using the training data
    
    #Saver
    saver = tf.train.Saver()
    with tf.Session() as sess:
        # Initialize variables that will live in the graph
        sess.run(tf.global_variables_initializer())
        for epoch in range(epochs):
            log('epoch {:>4d}/{:>4d}'.format(epoch, epochs))
            epoch_time = time()
            for t, (x_np, y_np) in enumerate(train_dset):
                # Run the graph on a batch of training data; recall that asking
                # TensorFlow to evaluate loss will cause an SGD step to happen.
                feed_dict = {x: x_np, y: y_np}
                loss_np = sess.run(loss, feed_dict=feed_dict)

                # Periodically print the loss and check accuracy on the val set
                if t % print_every == 0:
                    num_correct, num_samples, acc = get_accuracy(sess, val_dset, x, scores, is_training)
                    log('   iteration = {:>4d}, loss = {:>8.4f}, accuracy = {:>8.2f}%'.format(t, loss_np, acc))
                    if  acc > current_best_acc:
                        save_path = saver.save(sess, "E:\\courses\\stat231A\\homework1\\models\\best_model_block1_and_block2.ckpt")
                        print("Model saved in path: %s" % save_path)
                        current_best_acc = acc
                        log("Current_Best_Acc: ", acc)

            train_losses.append(loss_np)
            test_accuracies.append(acc)

            log('epoch {:>4d} took {:>.2f}s'.format(epoch, time()-epoch_time))
        
        log("Finished training")
        save_path = saver.save(sess, "E:\\courses\\stat231A\\homework1\\models\\final_model_block1_and_block2.ckpt")
        return params, sess.run(params), train_losses, test_accuracies

In [23]:
def get_accuracy(sess, dset, x, logits, is_training=None):
    """
    Check accuracy on a classification model.

    Args:
    - sess: A TensorFlow Session that will be used to run the graph
    - dset: A Dataset object on which to check accuracy
    - x: A TensorFlow placeholder Tensor where input images should be fed
    - scores: A TensorFlow Tensor representing the scores output from the
      model; this is the Tensor we will ask TensorFlow to evaluate.

    Returns: Nothing, but prints the accuracy of the model
    """
    num_correct, num_samples = 0, 0
    for x_batch, y_batch in dset:
        feed_dict = {x: x_batch, is_training: 0}
        scores_np = sess.run(logits, feed_dict=feed_dict)
        y_pred = scores_np.argmax(axis=1)
        num_samples += x_batch.shape[0]
        num_correct += (y_pred == y_batch).sum()
    acc = float(num_correct) / num_samples
    return num_correct, num_samples, 100 * acc

In [24]:
learning_rate = 0.001
epochs = 100

In [25]:
params, params_val, train_losses, test_accuracies = train(convnet_forward_block1_and_block2, convnet_init_block1_and_block2, learning_rate, epochs=epochs)

12:32:37 : epoch    0/ 100
12:32:38 :    iteration =    0, loss =   7.5093, accuracy =     0.00%
12:32:50 :    iteration =  100, loss =   5.3697, accuracy =     1.20%
Model saved in path: E:\courses\stat231A\homework1\models\best_model_block1_and_block2.ckpt
12:32:50 : Current_Best_Acc:  1.2
12:33:01 :    iteration =  200, loss =   4.3212, accuracy =    13.80%
Model saved in path: E:\courses\stat231A\homework1\models\best_model_block1_and_block2.ckpt
12:33:01 : Current_Best_Acc:  13.8
12:33:14 :    iteration =  300, loss =   3.7329, accuracy =    13.60%
12:33:29 :    iteration =  400, loss =   3.1941, accuracy =    14.40%
Model saved in path: E:\courses\stat231A\homework1\models\best_model_block1_and_block2.ckpt
12:33:29 : Current_Best_Acc:  14.399999999999999
12:33:42 :    iteration =  500, loss =   3.1082, accuracy =    13.90%
12:33:54 :    iteration =  600, loss =   2.7606, accuracy =    15.10%
Model saved in path: E:\courses\stat231A\homework1\models\best_model_block1_and_block2.ck

12:45:04 :    iteration =  200, loss =   2.0358, accuracy =    23.30%
Model saved in path: E:\courses\stat231A\homework1\models\best_model_block1_and_block2.ckpt
12:45:04 : Current_Best_Acc:  23.3
12:45:16 :    iteration =  300, loss =   2.0764, accuracy =    23.30%
12:45:28 :    iteration =  400, loss =   2.1619, accuracy =    22.90%
12:45:39 :    iteration =  500, loss =   2.1048, accuracy =    23.10%
12:45:51 :    iteration =  600, loss =   2.1105, accuracy =    22.70%
12:46:03 :    iteration =  700, loss =   2.2026, accuracy =    22.10%
12:46:10 : epoch    8 took 89.87s
12:46:10 : epoch    9/ 100
12:46:11 :    iteration =    0, loss =   2.1255, accuracy =    23.40%
Model saved in path: E:\courses\stat231A\homework1\models\best_model_block1_and_block2.ckpt
12:46:11 : Current_Best_Acc:  23.400000000000002
12:46:23 :    iteration =  100, loss =   2.1555, accuracy =    22.90%
12:46:35 :    iteration =  200, loss =   2.0213, accuracy =    23.70%
Model saved in path: E:\courses\stat231A\

13:01:07 : epoch   18 took 89.41s
13:01:07 : epoch   19/ 100
13:01:07 :    iteration =    0, loss =   2.0715, accuracy =    25.70%
13:01:19 :    iteration =  100, loss =   2.0905, accuracy =    25.50%
13:01:31 :    iteration =  200, loss =   1.9589, accuracy =    26.40%
13:01:42 :    iteration =  300, loss =   2.0319, accuracy =    26.00%
13:01:54 :    iteration =  400, loss =   2.0972, accuracy =    26.00%
13:02:05 :    iteration =  500, loss =   2.0755, accuracy =    25.20%
13:02:17 :    iteration =  600, loss =   2.0431, accuracy =    26.00%
13:02:29 :    iteration =  700, loss =   2.1636, accuracy =    25.40%
13:02:36 : epoch   19 took 89.25s
13:02:36 : epoch   20/ 100
13:02:37 :    iteration =    0, loss =   2.0678, accuracy =    25.70%
13:02:48 :    iteration =  100, loss =   2.0865, accuracy =    25.80%
13:03:00 :    iteration =  200, loss =   1.9566, accuracy =    26.20%
13:03:12 :    iteration =  300, loss =   2.0306, accuracy =    25.70%
13:03:23 :    iteration =  400, loss =

13:20:32 : epoch   31 took 91.29s
13:20:32 : epoch   32/ 100
13:20:33 :    iteration =    0, loss =   2.0410, accuracy =    25.50%
13:20:45 :    iteration =  100, loss =   2.0551, accuracy =    26.20%
13:20:57 :    iteration =  200, loss =   1.9307, accuracy =    27.10%
13:21:09 :    iteration =  300, loss =   2.0172, accuracy =    25.40%
13:21:21 :    iteration =  400, loss =   2.0822, accuracy =    26.40%
13:21:33 :    iteration =  500, loss =   2.0765, accuracy =    24.40%
13:21:47 :    iteration =  600, loss =   2.0168, accuracy =    25.90%
13:21:58 :    iteration =  700, loss =   2.1427, accuracy =    26.60%
13:22:05 : epoch   32 took 92.97s
13:22:05 : epoch   33/ 100
13:22:06 :    iteration =    0, loss =   2.0392, accuracy =    25.40%
13:22:17 :    iteration =  100, loss =   2.0529, accuracy =    26.30%
13:22:29 :    iteration =  200, loss =   1.9295, accuracy =    27.40%
Model saved in path: E:\courses\stat231A\homework1\models\best_model_block1_and_block2.ckpt
13:22:29 : Curre

13:38:43 :    iteration =  400, loss =   2.0748, accuracy =    26.70%
13:38:54 :    iteration =  500, loss =   2.0716, accuracy =    26.10%
13:39:05 :    iteration =  600, loss =   1.9994, accuracy =    26.50%
13:39:16 :    iteration =  700, loss =   2.1292, accuracy =    26.90%
13:39:23 : epoch   44 took 86.17s
13:39:23 : epoch   45/ 100
13:39:24 :    iteration =    0, loss =   2.0210, accuracy =    26.10%
13:39:35 :    iteration =  100, loss =   2.0299, accuracy =    27.00%
13:39:47 :    iteration =  200, loss =   1.9156, accuracy =    27.80%
Model saved in path: E:\courses\stat231A\homework1\models\best_model_block1_and_block2.ckpt
13:39:47 : Current_Best_Acc:  27.800000000000004
13:39:58 :    iteration =  300, loss =   2.0043, accuracy =    26.60%
13:40:09 :    iteration =  400, loss =   2.0741, accuracy =    26.80%
13:40:20 :    iteration =  500, loss =   2.0712, accuracy =    26.20%
13:40:31 :    iteration =  600, loss =   1.9980, accuracy =    26.60%
13:40:43 :    iteration =  7

13:57:03 :    iteration =    0, loss =   2.0024, accuracy =    27.00%
13:57:15 :    iteration =  100, loss =   2.0116, accuracy =    27.30%
13:57:27 :    iteration =  200, loss =   1.9028, accuracy =    28.10%
13:57:39 :    iteration =  300, loss =   1.9932, accuracy =    27.10%
13:57:51 :    iteration =  400, loss =   2.0684, accuracy =    27.40%
13:58:03 :    iteration =  500, loss =   2.0680, accuracy =    27.30%
13:58:15 :    iteration =  600, loss =   1.9837, accuracy =    27.70%
13:58:27 :    iteration =  700, loss =   2.1170, accuracy =    26.80%
13:58:34 : epoch   57 took 92.54s
13:58:34 : epoch   58/ 100
13:58:35 :    iteration =    0, loss =   2.0010, accuracy =    27.40%
13:58:48 :    iteration =  100, loss =   2.0103, accuracy =    27.50%
13:59:01 :    iteration =  200, loss =   1.9019, accuracy =    28.30%
Model saved in path: E:\courses\stat231A\homework1\models\best_model_block1_and_block2.ckpt
13:59:01 : Current_Best_Acc:  28.299999999999997
13:59:14 :    iteration =  3

14:14:50 :    iteration =  700, loss =   2.1084, accuracy =    28.60%
14:14:56 : epoch   68 took 86.19s
14:14:56 : epoch   69/ 100
14:14:57 :    iteration =    0, loss =   1.9856, accuracy =    28.90%
14:15:08 :    iteration =  100, loss =   1.9984, accuracy =    29.00%
14:15:20 :    iteration =  200, loss =   1.8934, accuracy =    29.00%
14:15:31 :    iteration =  300, loss =   1.9875, accuracy =    27.90%
14:15:42 :    iteration =  400, loss =   2.0618, accuracy =    28.50%
14:15:54 :    iteration =  500, loss =   2.0665, accuracy =    27.80%
14:16:05 :    iteration =  600, loss =   1.9704, accuracy =    29.50%
Model saved in path: E:\courses\stat231A\homework1\models\best_model_block1_and_block2.ckpt
14:16:05 : Current_Best_Acc:  29.5
14:16:16 :    iteration =  700, loss =   2.1077, accuracy =    28.70%
14:16:23 : epoch   69 took 86.77s
14:16:23 : epoch   70/ 100
14:16:24 :    iteration =    0, loss =   1.9845, accuracy =    28.80%
14:16:35 :    iteration =  100, loss =   1.9974, ac

14:33:47 :    iteration =  400, loss =   2.0571, accuracy =    28.80%
14:33:59 :    iteration =  500, loss =   2.0659, accuracy =    28.30%
14:34:10 :    iteration =  600, loss =   1.9608, accuracy =    29.50%
14:34:22 :    iteration =  700, loss =   2.0992, accuracy =    29.10%
14:34:29 : epoch   81 took 88.06s
14:34:29 : epoch   82/ 100
14:34:30 :    iteration =    0, loss =   1.9706, accuracy =    28.90%
14:34:41 :    iteration =  100, loss =   1.9842, accuracy =    29.30%
14:34:52 :    iteration =  200, loss =   1.8856, accuracy =    29.80%
Model saved in path: E:\courses\stat231A\homework1\models\best_model_block1_and_block2.ckpt
14:34:52 : Current_Best_Acc:  29.799999999999997
14:35:04 :    iteration =  300, loss =   1.9801, accuracy =    28.40%
14:35:16 :    iteration =  400, loss =   2.0566, accuracy =    28.70%
14:35:29 :    iteration =  500, loss =   2.0658, accuracy =    28.10%
14:35:40 :    iteration =  600, loss =   1.9600, accuracy =    29.40%
14:35:52 :    iteration =  7

14:52:08 : epoch   93 took 87.74s
14:52:08 : epoch   94/ 100
14:52:08 :    iteration =    0, loss =   1.9560, accuracy =    29.40%
14:52:20 :    iteration =  100, loss =   1.9715, accuracy =    29.60%
14:52:31 :    iteration =  200, loss =   1.8764, accuracy =    29.70%
14:52:43 :    iteration =  300, loss =   1.9741, accuracy =    29.20%
14:52:54 :    iteration =  400, loss =   2.0515, accuracy =    28.90%
14:53:06 :    iteration =  500, loss =   2.0638, accuracy =    28.90%
14:53:17 :    iteration =  600, loss =   1.9522, accuracy =    30.30%
14:53:29 :    iteration =  700, loss =   2.0896, accuracy =    30.20%
14:53:36 : epoch   94 took 87.89s
14:53:36 : epoch   95/ 100
14:53:36 :    iteration =    0, loss =   1.9548, accuracy =    29.60%
14:53:48 :    iteration =  100, loss =   1.9703, accuracy =    29.50%
14:53:59 :    iteration =  200, loss =   1.8758, accuracy =    29.90%
14:54:11 :    iteration =  300, loss =   1.9734, accuracy =    29.10%
14:54:22 :    iteration =  400, loss =

In [26]:
print(train_losses)
print(test_accuracies)

[2.8028224, 2.498616, 2.3845265, 2.3270955, 2.292948, 2.26975, 2.2538009, 2.2421422, 2.233581, 2.2265036, 2.2195644, 2.2137423, 2.2086108, 2.2045758, 2.2013564, 2.1990685, 2.1968646, 2.1945095, 2.1925101, 2.190737, 2.188683, 2.1867652, 2.1850266, 2.1835935, 2.1820512, 2.1805875, 2.1791337, 2.1774926, 2.1761894, 2.174725, 2.1734984, 2.1721241, 2.1705844, 2.1691709, 2.1676197, 2.166097, 2.1647027, 2.163298, 2.1618605, 2.1603649, 2.1588771, 2.157621, 2.1562989, 2.1550128, 2.1536174, 2.1522708, 2.1508632, 2.149453, 2.1482677, 2.1471498, 2.1460822, 2.1449642, 2.1440048, 2.142964, 2.1417751, 2.1408324, 2.1398742, 2.1388, 2.1376953, 2.136705, 2.1355996, 2.1346817, 2.1337733, 2.1329713, 2.1321661, 2.131213, 2.1302238, 2.1294315, 2.128615, 2.1277387, 2.1268036, 2.1257877, 2.1247969, 2.1238341, 2.1230273, 2.1220298, 2.1210608, 2.1202438, 2.119468, 2.118433, 2.117371, 2.1166139, 2.1158073, 2.1150608, 2.1143858, 2.1136832, 2.1128745, 2.112221, 2.1114528, 2.1107767, 2.1102188, 2.1098492, 2.1095133,

In [None]:
train_writer = tf.summary.FileWriter('E:\\courses\\stat231A\\homework1\\models\\',
                                      sess.graph)

In [None]:
from bokeh.plotting import figure, output_notebook, show
from bokeh.io import export_png

In [None]:
# prepare some data
x = train_losses
y = test_accuracies

# create a new plot with a title and axis labels
p = figure(title="Training Loss over Epochs", x_axis_label='epoch', y_axis_label='training loss')

# add a line renderer with legend and line thickness
p.line(np.arange(len(x)), x, line_width=2)

# create a new plot with a title and axis labels
q = figure(title="Testing Accuracy over Epochs", x_axis_label='epoch', y_axis_label='accuracy')

# add a line renderer with legend and line thickness
q.line(np.arange(len(y)), y, line_width=2)

#show
output_notebook()
show(p)
show(q)

In [None]:
params_block1_only, params_val_block1_only, train_losses_block1_only, test_accuracies_block1_only \
= train(convnet_forward_block1_only, convnet_init_block1_only, learning_rate, epochs=epochs)

In [None]:
params_block1_and_block2, params_val_block1_and_block2, train_losses_block1_and_block2, test_accuracies_block1_and_block2 \
= train(convnet_forward_block1_and_block2, convnet_init_block1_and_block2, learning_rate, epochs=epochs)

In [None]:
params_block1_block2_and_block3, params_val_block1_block2_and_block3, train_losses_block1_block2_and_block3, test_accuracies_block1_block2_and_block3 \
= train(convnet_forward_block1_block2_and_block3, convnet_init_block1_block2_and_block3, learning_rate, epochs=epochs)

In [6]:
def plot_kernels_on_grid(kernel, grid_Y, grid_X, pad = 1):
    """
    Visualize convolutional features as an image.

    Args:
      kernel:            tensor of shape [Y, X, NumChannels, NumKernels]
      (grid_Y, grid_X):  shape of the grid. Require: NumKernels == grid_Y * grid_X
                           User is responsible of how to break into two multiples.
      pad:               number of black pixels around each filter (between them)

    Return:
      Tensor of shape [(Y+2*pad)*grid_Y, (X+2*pad)*grid_X, NumChannels, 1].
    """

    x_min = tf.reduce_min(kernel)
    x_max = tf.reduce_max(kernel)

    kernel1 = (kernel - x_min) / (x_max - x_min)

    # pad X and Y
    x1 = tf.pad(kernel1, tf.constant([[pad,pad], [pad, pad], [0,0], [0,0]]), mode='CONSTANT')

    # X and Y dimensions, w.r.t. padding
    Y = kernel1.get_shape()[0] + 2 * pad
    X = kernel1.get_shape()[1] + 2 * pad

    channels = kernel1.get_shape()[2]

    # put NumKernels to the 1st dimension
    x2 = tf.transpose(x1, (3, 0, 1, 2))
    # organize grid on Y axis
    x3 = tf.reshape(x2, tf.stack([grid_X, Y * grid_Y, X, channels]))

    # switch X and Y axes
    x4 = tf.transpose(x3, (0, 2, 1, 3))
    # organize grid on X axis
    x5 = tf.reshape(x4, tf.stack([1, X * grid_X, Y * grid_Y, channels]))

    # back to normal order (not combining with the next step for clarity)
    x6 = tf.transpose(x5, (2, 1, 3, 0))

    # to tf.image_summary order [batch_size, height, width, channels], where in this case batch_size == 1
    x7 = tf.transpose(x6, (3, 0, 1, 2))

    # scale to [0, 255] and convert to uint8
    return tf.image.convert_image_dtype(x7, dtype=tf.uint8)


grid = plot_kernels_on_grid(params_val[0], 4, 8)

NameError: name 'params_val' is not defined

In [None]:
saver = tf.train.Saver()
with tf.Session() as sess:
    ############################################################################
    # TODO: Retrieve image of kernels from symbolic 'grid' variable.           #
    ############################################################################
    saver.restore(sess, "E:\\courses\\stat231A\\homework1\\models\\final_model.ckpt")
    grid_val = sess.run(grid)

    ############################################################################
    #                             END OF YOUR CODE                             #
    ############################################################################
    plt.imshow(grid_val[0])
    plt.show()
    img = Image.fromarray(grid_val[0], 'RGB')
    img.save('3_kernels.jpeg')

In [None]:
params_val[0].shape

In [None]:
def plot_filter_grid(units):
    filters = units.shape[3]
    plt.figure(1, figsize=(20, 12))
    n_columns = 8
    n_rows = math.ceil(filters / n_columns) + 1
    for i in range(filters):
        plt.subplot(n_rows, n_columns, i+1)
        plt.title('Filter ' + str(i))
        plt.axis('off')
        plt.imshow(units[0, :, :, i], interpolation='nearest')


In [None]:
image = get_X_train_sample()
plt.imshow((image)[0], interpolation='nearest', )
#plt.figure(1, figsize=(10, 10))
plt.axis('off')
#plt.savefig('4_data.png', bbox_inches='tight')
plt.show()


In [None]:
def conv1_activations(x, conv_w1, conv_b1):
    ############################################################################
    # TODO: Compute activations for the first conv layer.                      #
    ############################################################################
    x1_1_pad = tf.pad(x, [[0, 0], [2, 2], [2, 2], [0, 0]], mode='CONSTANT', constant_values=0)
    x1_2_conv = tf.nn.conv2d(x1_1_pad, conv_w1, [1, 1, 1, 1], padding='VALID') + conv_b1
  
    ############################################################################
    #                             END OF YOUR CODE                             #
    ############################################################################
    return x1_2_conv

In [None]:
saver = tf.train.Saver()
with tf.Session() as sess:
    saver.restore(sess, "E:\\courses\\stat231A\\homework1\\models\\final_model.ckpt")
    x = tf.placeholder(tf.float32, [None, 32, 32, 3])
    hidden_1 = sess.run(conv1_activations(x, params[0], params[1]), feed_dict={x: image, params[0]: params_val[0], params[1]: params_val[1]})
    plot_filter_grid(hidden_1)
    #plt.savefig('5_activations.png', bbox_inches='tight')
    plt.show()

In [7]:
np.array(image).shape

NameError: name 'image' is not defined