Import packages

In [1]:
#Importing packages
import time
import numpy as np
import tensorflow as tf
from tensorflow.keras import Model, layers
from tensorflow.keras.datasets import cifar100
from tensorflow.keras.utils import to_categorical

Define hyperparameter & Data info

In [2]:
#Learning Rate, Iterations, Batch Size - Hyperparameters
learning_rate = 0.0001
iterations = 40000
batch_size = 256
#dropout = 0.5

Load CIFAR-100 data

In [3]:
#CIFAR-100 Dataset has 60000 images of common objects, 600 images per class and 100 classes in total
(X_train, Y_train), (X_test, Y_test) = cifar100.load_data()

Data preparation

In [4]:
#Convert to float type
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

#One hot encoding of labels
Y_train = to_categorical(Y_train, 100)
Y_test = to_categorical(Y_test, 100)

#Normalization of images
X_train = X_train / 255.
X_test = X_test / 255.

#Reshape to [None, 3072] = 32 * 32 * 3
X_train = X_train.reshape(-1, 32*32*3)
X_test = X_test.reshape(-1, 32*32*3)

#Use Tensorflow data for shuffling and fetching it batchwise
train_data = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1)

Define the model

In [5]:
class NeuralNet(Model):
    
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.L1 = layers.Dense(3072, activation=tf.nn.relu)
        #self.L2 = layers.Dense(1024, activation=tf.nn.relu)
        #self.L3 = layers.Dense(512, activation=tf.nn.relu)
        #self.L4 = layers.Dense(256, activation=tf.nn.relu)
        #self.L5 = layers.Dense(128, activation=tf.nn.relu)
        self.Yhat = layers.Dense(100)

    # Set forward pass.
    def call(self, x, is_training=False):
        x = self.L1(x)
        #x = self.L2(x)
        #x = self.L3(x)
        #x = self.L4(x)
        #x = self.L5(x)
        x = self.Yhat(x)
        if not is_training:
            x = tf.nn.softmax(x)
        return x

# Build the network
model = NeuralNet()

Define the cost function

In [6]:
#Cost function definition, using the softmax cross entropy loss
def cross_entropy(y_pred, y_true):
    
    y_true = tf.cast(y_true, tf.int64)
    loss = tf.nn.softmax_cross_entropy_with_logits(labels = y_true, logits = y_pred)
    
    return tf.reduce_mean(loss)

Metrics to check prediction

In [7]:
#To verify the predictions
def accuracy(y_pred, y_true):
    
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    a = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)
    
    return a

Define Optimizer

In [8]:
#Setup the optimizer function with learning rate as parameter
optimizer = tf.optimizers.Adam(learning_rate)

Put it all together

In [9]:
def run(X, Y):
    
    #Using GradientTape for automatic differentiation.
    with tf.GradientTape() as tape:
        pred = model(X, is_training = True)
        loss = cross_entropy(pred, Y)
        
    #Get W and B values 
    train_variables = model.trainable_variables

    #Compute gradients.
    gradients = tape.gradient(loss, train_variables)
    
    #Update W and b following gradients
    optimizer.apply_gradients(zip(gradients, train_variables))

Let's train for 40000 iterations

In [10]:
#Run the for-loop for a well defined number of epochs

s = time.clock()
loss_plot = []
accuracy_plot = []

for i, (Batch_x, Batch_y) in enumerate(train_data.take(40001), 1):
    # Run the optimization to update W and b values.
    run(Batch_x, Batch_y)
    
    if i % 2000 == 0:
        pred = model(Batch_x, is_training = True)
        loss = cross_entropy(pred, Batch_y)
        acc = accuracy(pred, Batch_y)
        print('Iteration = {}'.format(i))
        print('Loss = {}'.format(loss))
        print('Accuracy = {}\n'.format(acc))
        
        loss_plot.append(loss)
        accuracy_plot.append(acc)
        
e = time.clock()

print('Time Elapsed = {} \n'.format(e-s))

pred = model(X_test, is_training = False)
test_accuracy = accuracy(pred, Y_test)
print('Accuracy on Test Set is {}'.format(test_accuracy))

  This is separate from the ipykernel package so we can avoid doing imports until


InvalidArgumentError: Incompatible shapes: [256] vs. [256,100] [Op:Equal]

Plotting the graphs

In [None]:
import matplotlib.pyplot as plt

iterations = np.arange(1000, 40001, 2000)
plt.plot(iterations, loss_plot, label = 'Training Loss')
plt.title('Loss graph')
plt.xlabel('Epochs')
plt.xticks(np.arange(1000, 40001, 2000))
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
iterations = np.arange(1000, 40001, 2000)
plt.plot(iterations, accuracy_plot, label = 'Training Accuracy')
plt.title('Accuracy graph')
plt.xlabel('Epochs')2
plt.xticks(np.arange(1000, 40001, 2000))
plt.ylabel('Accuracy')
plt.legend()
plt.show()