<a href="https://colab.research.google.com/github/cwerries/IANNWTF_Group_14_Submissions/blob/master/Homework_6_Resnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Imports

In [None]:
import numpy as np
import os

#%tensorflow_version 2.x
import tensorflow as tf

import matplotlib.pyplot as plt


Loading dataset Cifar-10

In [None]:
(training_inputs, training_labels), (test_inputs, test_labels) = tf.keras.datasets.cifar10.load_data()

print("Training inputs: " + str(training_inputs.shape))
print("Training labels: " + str(training_labels.shape))
print("Test inputs: " + str(test_inputs.shape))
print("Test labels: " + str(test_labels.shape))

Training inputs: (50000, 32, 32, 3)
Training labels: (50000, 1)
Test inputs: (10000, 32, 32, 3)
Test labels: (10000, 1)


Input pipeline

In [None]:
batch_size = 64

# Creates tensorflow datasets for the training and test data.
training_dataset = tf.data.Dataset.from_tensor_slices((training_inputs, training_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_inputs, test_labels))

# Conducts the normalization of the inputs (images) and the one-hot-encoding of the targets.
training_dataset = training_dataset.map(lambda inp, tar: (inp, tf.one_hot(tf.squeeze(tar), 10)))
# tf.squeeze(tar) to remove dimensions of size 1 from the shape of the target-tensors.
training_dataset = training_dataset.map(lambda inp, tar: ((2*(inp/255)-1), tar))

# Conducts the normalization of the inputs (images) and the one-hot-encoding of the targets.
test_dataset = test_dataset.map(lambda inp, tar: (inp, tf.one_hot(tf.squeeze(tar), 10)))
# tf.squeeze(tar) to remove dimensions of size 1 from the shape of the target-tensors.
test_dataset = test_dataset.map(lambda inp, tar: ((2*(inp/255)-1), tar))
                                                                                          
# Batches, shuffles and prefetches the training- and test datasets.
training_dataset = training_dataset.batch(batch_size)
training_dataset = training_dataset.shuffle(batch_size*10)
training_dataset = training_dataset.prefetch(batch_size*2)

test_dataset = test_dataset.batch(batch_size)
test_dataset = test_dataset.shuffle(batch_size*10)
test_dataset = test_dataset.prefetch(batch_size*2)

In [None]:
# Description: The class ResidualBlock describes a residual block of a ResNet.
#              With residual blocks, inputs can forward propagate faster through the residual connections across layers.
class ResidualBlock(tf.keras.layers.Layer): 
  
  def __init__(self):
    super(ResidualBlock, self).__init__()

    self.conv_1 = tf.keras.layers.Conv2D(filters = 120,
                                         kernel_size = 1,activation = None, 
                                         padding = 'same', 
                                         kernel_initializer = tf.keras.initializers.glorot_normal,   
                                         bias_initializer = 'zeros', 
                                         kernel_regularizer = tf.keras.regularizers.L2(0.01))        
    
    self.batch_norm_1 = tf.keras.layers.BatchNormalization()                                        
    
    self.activ_1 = tf.keras.activations.relu                          
    
    self.conv_2 = tf.keras.layers.Conv2D(filters = 120,
                                         kernel_size = 3,
                                         activation = None,
                                         padding = 'same',
                                         kernel_initializer = tf.keras.initializers.glorot_normal,   
                                         bias_initializer = 'zeros',                                 
                                         kernel_regularizer = tf.keras.regularizers.L2(0.01))       
    
    self.batch_norm_2 = tf.keras.layers.BatchNormalization()                                        
    
    self.activ_2 = tf.keras.activations.relu
    
    self.dropout_2 = tf.keras.layers.Dropout(rate = 0.5)                                            
    
    self.conv_3 = tf.keras.layers.Conv2D(filters = 120,
                                         kernel_size = 1,
                                         activation = None,
                                         padding = 'same',
                                         kernel_initializer = tf.keras.initializers.glorot_normal,   
                                         bias_initializer = 'zeros',                                
                                         kernel_regularizer = tf.keras.regularizers.L2(0.01))       
    
    self.batch_norm_3 = tf.keras.layers.BatchNormalization()                                        
    
    self.activ_3 = tf.keras.activations.relu


  @tf.function
  def call(self, block_input, training = True):
    x = self.conv_1(block_input)
    x = self.batch_norm_1(x, training)
    x = self.activ_1(x)
    x = self.conv_2(x)
    x = self.batch_norm_2(x, training)
    x = self.activ_2(x)
    x = self.dropout_2(x, training)
    x = self.conv_3(x)
    x = self.batch_norm_3(x, training)
    x = self.activ_3(x)

    block_output = block_input + x
    return block_output

In [None]:
# Description: The class ResNet describes a residual convolutional neural network with a feature extractor and a classifier.
#              Consists of residual blocks. Introduces “identity shortcut connection”.
class ResNet(tf.keras.Model): 
  
  def __init__(self, num_residual_blocks):
    super(ResNet, self).__init__()

    # Feature extractor
    self.conv_1 = tf.keras.layers.Conv2D(filters = 120,
                                         kernel_size = 3,
                                         activation = None,
                                         padding = 'same',
                                         kernel_initializer = tf.keras.initializers.glorot_normal,  
                                         bias_initializer = 'zeros',                               
                                         kernel_regularizer = tf.keras.regularizers.L2(0.01),      
                                         input_shape = (32, 32, 3))
    
    self.batch_norm_1 = tf.keras.layers.BatchNormalization()                                     
    
    self.activ_1 = tf.keras.activations.relu    
    
    self.dropout_1 = tf.keras.layers.Dropout(rate = 0.4)                                           

    # create residual blocks.
    self.residual_blocks = []
    for _ in range(num_residual_blocks):
      self.residual_blocks.append(ResidualBlock())

    # Classifier
    self.global_pool = tf.keras.layers.GlobalAveragePooling2D()   #pool_size = 3, strides = 2, padding = 'same'
    self.dropout = tf.keras.layers.Dropout(rate = 0.4)                                              
    self.output_layer = tf.keras.layers.Dense(10, activation=tf.keras.activations.softmax)

  @tf.function
  def call(self, x, training = True):
    x = self.conv_1(x)
    x = self.batch_norm_1(x, training)
    x = self.activ_1(x)
    x = self.dropout_1(x, training)

    for res_block in self.residual_blocks:
      x = res_block(x, training)

    x = self.global_pool(x)
    x = self.dropout(x, training)
    x = self.output_layer(x)
    return x

In [None]:
#@tf.function
def training_step(model, training_data, loss_fn, optimizer, training = True):
  training_losses = []
  training_accuracies = []

  for (input, target) in training_data:
    with tf.GradientTape() as tape:
      prediction = model(input, training)
      current_training_loss = loss_fn(target, prediction)+ tf.math.reduce_sum(model.losses)   
      gradients = tape.gradient(current_training_loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    training_losses.append(current_training_loss.numpy())

    current_training_accuracy = np.argmax(target, axis=1) == np.argmax(prediction, axis=1)
    training_accuracies.append(np.mean(current_training_accuracy))  
  
  training_loss = np.mean(training_losses)
  training_accuracy = np.mean(training_accuracies)
  return training_loss, training_accuracy


#@tf.function
def test(model, test_data, loss_fn, training = False):
  test_losses = []
  test_accuracies = []

  for (input, target) in test_data:
    prediction = model(input, training)
    
    current_test_loss = loss_fn(target, prediction)
    test_losses.append(current_test_loss.numpy())

    current_test_accuracy = np.argmax(target, axis=1) == np.argmax(prediction, axis=1)
    test_accuracies.append(np.mean(current_test_accuracy))   
    
  test_loss = np.mean(test_losses)
  test_accuracy = np.mean(test_accuracies)
  return test_loss, test_accuracy

In [None]:
tf.keras.backend.clear_session()

num_residual_blocks = 6
model = ResNet(num_residual_blocks) 

n_epochs = 10
learning_rate = 0.0001
loss_fn = tf.keras.losses.CategoricalCrossentropy()                    
optimizer = tf.keras.optimizers.Adam(learning_rate, amsgrad = True)    

training_losses = []
training_accuracies = []
test_losses = []
test_accuracies = []

# Training and test loop
for epoch in range(n_epochs):
    print('Epoch ' + str(epoch))

    training_loss, training_accuracy = training_step(model, training_dataset, loss_fn, optimizer, training = True)
    training_losses.append(training_loss)
    training_accuracies.append(training_accuracy)

    test_loss, test_accuracy = test(model, test_dataset, loss_fn, training = False)
    test_losses.append(test_loss)
    test_accuracies.append(test_accuracy)

    print("Training accuracy: " + str(training_accuracy))
    print("Test accuracy: " + str(test_accuracy))

Epoch 0
Training accuracy: 0.18446291560102301
Test accuracy: 0.2177547770700637
Epoch 1
Training accuracy: 0.2805306905370844
Test accuracy: 0.37798566878980894
Epoch 2
Training accuracy: 0.3651094948849105
Test accuracy: 0.43869426751592355
Epoch 3
Training accuracy: 0.4212156329923274
Test accuracy: 0.484375
Epoch 4
Training accuracy: 0.4547434462915601
Test accuracy: 0.5218949044585988
Epoch 5
Training accuracy: 0.4827365728900256
Test accuracy: 0.4635748407643312
Epoch 6
Training accuracy: 0.5111492966751918
Test accuracy: 0.48835589171974525
Epoch 7
Training accuracy: 0.5277533567774936
Test accuracy: 0.4856687898089172
Epoch 8


In [None]:
plt.figure()
line1, = plt.plot(training_losses)
line2, = plt.plot(test_losses)
plt.xlabel("Training steps")
plt.ylabel("Loss")
plt.legend((line1, line2),("Test", "Training"))
plt.show()

plt.figure()
line1, = plt.plot(training_accuracies)
line2, = plt.plot(test_accuracies)
plt.xlabel("Training steps")
plt.ylabel("Accuracy")
plt.legend((line1, line2),("Test", "Training"))
plt.show()