# TensorFlow demo

For purposes of learning how to git, and trying to make a nice notebook myself, this one creates, trains and  saves a model that performs pretty well on cifar-10.
The most interesting part perhaps, and the one which gave me most pain, was properly saving and reloading an arbitrary tf graph.

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def load_cifar10(num_training=49000, num_validation=1000, num_test=10000):
    """
    Fetch the CIFAR-10 dataset from the web and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.
    """
    # Load the raw CIFAR-10 dataset and use appropriate data types and shapes
    cifar10 = tf.keras.datasets.cifar10.load_data()
    (X_train, y_train), (X_test, y_test) = cifar10
    X_train = np.asarray(X_train, dtype=np.float32)
    y_train = np.asarray(y_train, dtype=np.int32).flatten()
    X_test = np.asarray(X_test, dtype=np.float32)
    y_test = np.asarray(y_test, dtype=np.int32).flatten()

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]
    print(X_train.shape)
    # Normalize the data: subtract the mean pixel and divide by std
    mean_pixel = X_train.mean(axis=(0, 1, 2), keepdims=True)
    std_pixel = X_train.std(axis=(0, 1, 2), keepdims=True)
    X_train = (X_train - mean_pixel) / std_pixel
    X_val = (X_val - mean_pixel) / std_pixel
    X_test = (X_test - mean_pixel) / std_pixel

    return X_train, y_train, X_val, y_val, X_test, y_test


# Invoke the above function to get our data.
NHW = (0, 1, 2)
X_train, y_train, X_val, y_val, X_test, y_test = load_cifar10()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape, y_train.dtype)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

(49000, 32, 32, 3)
Train data shape:  (49000, 32, 32, 3)
Train labels shape:  (49000,) int32
Validation data shape:  (1000, 32, 32, 3)
Validation labels shape:  (1000,)
Test data shape:  (10000, 32, 32, 3)
Test labels shape:  (10000,)


In [3]:
#creates a class to manage batches. there are more convenient APIs in tf for this,
#but I'm remaining faithful to the original nb
class Dataset(object):
    def __init__(self, X, y, batch_size, shuffle=False):
        """
        Construct a Dataset object to iterate over data X and labels y
        
        Inputs:
        - X: Numpy array of data, of any shape
        - y: Numpy array of labels, of any shape but with y.shape[0] == X.shape[0]
        - batch_size: Integer giving number of elements per minibatch
        - shuffle: (optional) Boolean, whether to shuffle the data on each epoch
        """
        assert X.shape[0] == y.shape[0], 'Got different numbers of data and labels'
        self.X, self.y = X, y
        self.batch_size, self.shuffle = batch_size, shuffle

    def __iter__(self):
        N, B = self.X.shape[0], self.batch_size
        idxs = np.arange(N)
        if self.shuffle:
            np.random.shuffle(idxs)
        return iter((self.X[i:i+B], self.y[i:i+B]) for i in range(0, N, B))


train_dset = Dataset(X_train, y_train, batch_size=64, shuffle=True)
val_dset = Dataset(X_val, y_val, batch_size=64, shuffle=False)
test_dset = Dataset(X_test, y_test, batch_size=64)

In [4]:
#set up your gpu as the device to run the graph. Pleas make sure you have the
#tensorflow_gpu distribution, otherwise training our model may be painful
USE_GPU = True

if USE_GPU:
    device = '/device:GPU:0'
else:
    device = '/cpu:0'


print('Using device: ', device)

Using device:  /device:GPU:0


In [15]:
#again, there are better APIs on tensorflow for this, but this gives us a nice feel of how the underlying 
#graphs work

def acc(sess, dset, x, scores,is_training=None):
    """
    Check accuracy on a classification model.
    
    Inputs:
    - sess: A TensorFlow Session that will be used to run the graph
    - dset: A Dataset object on which to check accuracy
    - x: A TensorFlow placeholder Tensor where input images should be fed
    - scores: A TensorFlow Tensor representing the scores output from the
      model; this is the Tensor we will ask TensorFlow to evaluate.
      
    Returns: Nothing, but prints the accuracy of the model
    """
    num_correct, num_samples = 0, 0
    for x_batch, y_batch in dset:
        feed_dict = {x: x_batch, is_training: 0}
        scores_np = sess.run(scores, feed_dict=feed_dict)
        y_pred = scores_np.argmax(axis=1)
        num_samples += x_batch.shape[0]
        num_correct += (y_pred == y_batch).sum()
    acc = float(num_correct) / num_samples
    return acc

# Training loop
Now for the important stuff, here we estabilish the central training loop for our models. It's a whole more flexible than doing a model.fit

In [14]:
batch_size=120  #only really important for the training dataset
printevery=100
train_dset = Dataset(X_train, y_train, batch_size=batch_size, shuffle=True)
val_dset = Dataset(X_val, y_val, batch_size=64,shuffle=False)
test_dset = Dataset(X_test, y_test,batch_size=64)
def train(model_init_fn, optimizer_init_fn, num_epochs=1,verbose=True,lr=1e-3):
    val_history=[]
    train_history=[]
    loss_hist=[]
#it's pretty important to always clean the tf graph that may be in your kernel
    tf.reset_default_graph()  
    
    with tf.device(device):
#crucial stuff here, we have to name some tensors in the graph so we can easily refer to them later
#when restoring the graph. x, y,loss, scores should suffice
#placeholders are variables we'll later feed into the graph
        x = tf.placeholder(tf.float32, [None, 32, 32, 3],'x') 
        y = tf.placeholder(tf.int32, [None],'y')
        is_training = tf.placeholder(tf.bool, name='is_training')

        
# calling the model fn to set up the graph associated to it
        scores = model_init_fn(x, is_training)
        scores=tf.identity(scores,'scores')
        # Compute the loss like we did in Part II
        print(y.shape)
        print(scores.shape)
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=scores)
        loss = tf.reduce_mean(loss,name='loss')
#same thing for the optimizer
        optimizer=optimizer_init_fn(lr)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

#weird tf shit, we have to create a dependency from the update operations to the train_op tensor
#so we can actually calculate them.
#tf only really calculates the operations it needs to
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss)
    
    #the tf session is like an instance of the graph where the tensors and placeholders actually 
    #take form. Always needed when using tf
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer()) #initiate all tensors in the sess. Never forget
        t = 0
        
        #tf thingy we create to save the tensors living in our sess
        #if we call the empty constructor, it will save all variables in sess
        saver=tf.train.Saver()
        best_val=0 #best validation accuracy. That's what we want
        for epoch in range(num_epochs):
            print('Starting epoch %d' % epoch)
            for x_np, y_np in train_dset:
                feed_dict = {x: x_np, y: y_np, is_training:1} #feed the data into the graph
                loss_np, _ = sess.run([loss, train_op], feed_dict=feed_dict)
                if t % printevery== 0:
                    train=acc(sess, train_dset, x, scores, is_training=is_training)
                    val=acc(sess, val_dset, x, scores, is_training=is_training)
                    if(verbose):
                        print('%d, loss = %.4f, train: %.4f, val: %.4f'
                              % (t, loss_np,train,val))

                    val_history.append(val)
                    train_history.append(train)
                    saver.save(sess,"model1/model.cpkt") #save our thousands of parameters into the disk
                    if(val>best_val):
                        saver.save(sess,"model1/best_model.cpkt") #save our fav model
                        best_val=val
                loss_hist.append(loss_np)
                t += 1
    return loss_hist,train_history,val_history

Now here's the tits. 
- We'll first train our model using one optimizer, save it, and do some finer optimization, using lower learning rate on that model.That's done via the `keep_training`

In [13]:


def keep_training(optimizer_init_fn,num_epochs=1,verbose=True,lr=1e-5):
    train_history=[]
    val_history=[]
    loss_hist=[]
    tf.reset_default_graph()    #clean the graph
    saver=tf.train.import_meta_graph("model1/best_model.cpkt.meta") #set up the graph we had
    graph=tf.get_default_graph()
    x=graph.get_tensor_by_name('x:0')   #here we retake our important tensors
    y=graph.get_tensor_by_name('y:0')
    scores=graph.get_tensor_by_name('scores:0')
    is_training=graph.get_tensor_by_name('is_training:0')
    loss=graph.get_tensor_by_name('loss:0')
    with tf.device(device):
        optimizer = optimizer_init_fn(lr) #our brand new optimizer
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss)

    with tf.Session() as sess:
        t = 0
        #we need to initialize the variables of our new optimizer
        sess.run(tf.global_variables_initializer())
        #and restore our hard worked upon parameters
        saver.restore(sess,"model1/best_model.cpkt")
        saver=tf.train.Saver()
        best_val=0
        for epoch in range(num_epochs):
            print('Starting epoch %d' % epoch)
            for x_np, y_np in train_dset:
                feed_dict = {x: x_np, y: y_np, is_training:1} 
                loss_np, _ = sess.run([loss, train_op], feed_dict=feed_dict)
                if t % printevery== 0:
                    train=acc(sess, train_dset, x, scores, is_training=is_training)
                    val=acc(sess, val_dset, x, scores, is_training=is_training)
                    if(verbose):
                        print('%d, loss = %.4f, train: %.4f, val: %.4f'
                              % (t, loss_np,train,val))

                    val_history.append(val)
                    train_history.append(train)
                    saver.save(sess,"model2/model.cpkt") #don't forget to save
                    if(val>best_val):
                        saver.save(sess,"model2/best_model.cpkt")
                        best_val=val
                loss_hist.append(loss_np)
                t += 1
    return loss_hist,train_history,val_history
    
  

Our pretty model will have the architecture
- Conv32
- MaxPool
- Conv64
- Conv96
- Conv128
- MaxPool
- Conv160
- FC200
- FC10 == scores

All without regularization (don't ask me, it worked better this way), and details you'll easily see below


In [8]:
input_shape=(32,32,3)
def deep_conv(inputs,is_training):
    initializer=tf.variance_scaling_initializer(scale=2.0)
    filters=[32,64,96,128,160]
    fcs=[200]
    layers=[tf.layers.Conv2D(filters=filters[0],kernel_size=5,kernel_initializer=initializer,
                             activation=tf.nn.leaky_relu,
                             padding='same',input_shape=input_shape)]
    for i in range(1,len(filters)):
        layers.append(tf.layers.BatchNormalization())
        layers.append(tf.layers.Conv2D(filters=filters[i],kernel_size=5,kernel_initializer=initializer,
                             activation=tf.nn.leaky_relu, padding='same'))
        if(i%3==2):
            layers.append(tf.layers.MaxPooling2D(2,2))
       
        
            
    layers.append(tf.layers.Flatten())
    for fc in fcs:
        layers.append(tf.layers.Dense(fc,kernel_initializer=initializer,activation=tf.nn.leaky_relu,
                                      kernel_regularizer=tf.keras.regularizers.l2(reg)))
    layers.append(tf.layers.Dense(10))
    model=tf.keras.Sequential(layers)
    return model(inputs)

In [16]:

def optimizer_init_fn(lr):
    optimizer = optimizer = tf.train.RMSPropOptimizer(lr,momentum=0.3)
    return optimizer
reg=0
loss_hist,train_hist,val_hist=train(deep_conv, optimizer_init_fn,num_epochs=10,verbose=True)
loss_hist2,train_hist2,val_jist2=keep_training(tf.train.AdagradOptimizer,num_epochs=5)

(?,)
(?, 10)
Starting epoch 0
0, loss = 2.9948, train: 0.1020, val: 0.1120


KeyboardInterrupt: 

- Check our results in the test dataset. Only do at the very end

In [17]:
tf.reset_default_graph()    
saver=tf.train.import_meta_graph("model2/best_model.cpkt.meta")
graph=tf.get_default_graph()
x=graph.get_tensor_by_name('x:0')
y=graph.get_tensor_by_name('y:0')
scores=graph.get_tensor_by_name('scores:0')
is_training=graph.get_tensor_by_name('is_training:0')
with tf.Session() as sess:

    saver=tf.train.Saver()
    saver.restore(sess,"model2/best_model.cpkt")
    print(acc(sess,test_dset,x,scores,is_training))
    


INFO:tensorflow:Restoring parameters from model2/best_model.cpkt
0.7701
