<a href="https://colab.research.google.com/github/kishore145/AI-ML-Foundations/blob/master/Neural%20Networks/18_VGG16_using_tensorlfow_on_mnist_and_cifar.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Architecture  

VGG16 is a 16 weights layer configuration.   
You can read more about it in the original paper : https://arxiv.org/pdf/1409.1556.pdf

| VGG16 Configuration |
| :-----------------: | 
| input               | 
| ------------------- | 
| conv3 - 64 |
| conv3 - 64 |
| maxpool |
| ------------------- | 
| conv3 - 128 | 
| conv3 - 128 |
| maxpool |
| ------------------- | 
| conv3 - 256 | 
| conv3 - 256 |
| conv3 - 256 |
| maxpool |
| ------------------- | 
| conv3 - 512 | 
| conv3 - 512 |
| conv3 - 512 |
| maxpool |
| ------------------- | 
| conv3 - 512 | 
| conv3 - 512 |
| conv3 - 512 |
| maxpool |
| ------------------- | 
| FC - 4096 | 
| FC - 4096 | 
| FC - 1000 |
| softmax |  







# Load dependencies and pre-process data

In [1]:
# Load dependencies
import tensorflow
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization

from tensorflow.keras.callbacks import ModelCheckpoint
import os

In [2]:
# Load mnist data
(X_train_mnist, y_train_mnist), (X_valid_mnist, y_valid_mnist) = mnist.load_data()

In [3]:
(X_train_cifar, y_train_cifar), (X_valid_cifar, y_valid_cifar) = cifar10.load_data()

In [4]:
# Check the shape of the data to ensure data is downloaded correctly
def datashape(X_train, y_train, X_valid, y_valid):
  print(f'''
  X_train shape: {X_train.shape},
  y_train shape: {y_train.shape},
  X_valid shape: {X_valid.shape},
  y_valid shape: {y_valid.shape}''')


In [5]:
# Review the revised shape of feature vector and target labels
print('Shape of mnist data set:')
datashape(X_train_mnist, y_train_mnist, X_valid_mnist, y_valid_mnist)
print('Shape of cifar data set:')
datashape(X_train_cifar, y_train_cifar, X_valid_cifar, y_valid_cifar)

Shape of mnist data set:

  X_train shape: (60000, 28, 28),
  y_train shape: (60000,),
  X_valid shape: (10000, 28, 28),
  y_valid shape: (10000,)
Shape of cifar data set:

  X_train shape: (50000, 32, 32, 3),
  y_train shape: (50000, 1),
  X_valid shape: (10000, 32, 32, 3),
  y_valid shape: (10000, 1)


In [6]:
# Reshape the numpy array to float32
# Preprocess data - Input to conv is a 4D tensor with shape (batch_size, rows, cols, channels) as default which can be changed by data_format, 
def preprocess(X_train, y_train, X_valid, y_valid, X_trainshape, X_validshape, n_classes):
  X_train = X_train.reshape(X_trainshape).astype('float32')
  X_valid = X_valid.reshape(X_validshape).astype('float32')
  X_train /= 255.
  X_valid /= 255.
  y_train = to_categorical(y_train, n_classes)
  y_valid = to_categorical(y_valid, n_classes)

  return X_train, y_train, X_valid, y_valid


In [7]:
n_classes = 10

In [8]:
X_train_mnist, y_train_mnist, X_valid_mnist, y_valid_mnist = preprocess(X_train_mnist, y_train_mnist, 
                                                                        X_valid_mnist, y_valid_mnist,
                                                                        (60000, 28, 28, 1), (10000, 28,28,1),
                                                                        n_classes)

X_train_cifar, y_train_cifar, X_valid_cifar, y_valid_cifar = preprocess(X_train_cifar, y_train_cifar, 
                                                                        X_valid_cifar, y_valid_cifar,
                                                                        (50000, 32, 32, 3), (10000, 32,32,3),
                                                                        n_classes)

# Review the revised shape of feature vector and target labels
print('Shape of mnist data set:')
datashape(X_train_mnist, y_train_mnist, X_valid_mnist, y_valid_mnist)
print('Shape of cifar data set:')
datashape(X_train_cifar, y_train_cifar, X_valid_cifar, y_valid_cifar)

Shape of mnist data set:

  X_train shape: (60000, 28, 28, 1),
  y_train shape: (60000, 10),
  X_valid shape: (10000, 28, 28, 1),
  y_valid shape: (10000, 10)
Shape of cifar data set:

  X_train shape: (50000, 32, 32, 3),
  y_train shape: (50000, 10),
  X_valid shape: (10000, 32, 32, 3),
  y_valid shape: (10000, 10)


# Design VGG16 neural network

In [9]:
# Strides are reduced due to low pixel count of the cifar & mnist dataset

def vgg16(input_shape):

  # Create model
  model = Sequential()

  # First conv pool block:
  model.add(Conv2D(filters = 64, kernel_size=(3,3), padding = 'same', 
                  activation = 'relu', input_shape = input_shape))
  model.add(Conv2D(filters = 64, kernel_size=(3,3), padding = 'same', 
                  activation = 'relu'))
  model.add(MaxPooling2D(pool_size=(2,2)))
  model.add(BatchNormalization())

  # Second conv pool block:
  model.add(Conv2D(filters = 128, kernel_size=(3,3), padding = 'same', 
                  activation = 'relu'))
  model.add(Conv2D(filters = 128, kernel_size=(3,3), padding = 'same', 
                  activation = 'relu'))
  model.add(MaxPooling2D(pool_size=(2,2)))
  model.add(BatchNormalization())

  # Third conv pool block consiting of 3 conv layers:
  model.add(Conv2D(filters = 256, kernel_size=(3, 3), padding = 'same', 
                  activation = 'relu'))
  model.add(Conv2D(filters = 256, kernel_size=(3, 3), padding = 'same', 
                  activation = 'relu'))
  model.add(Conv2D(filters = 256, kernel_size=(3, 3), padding = 'same', 
                  activation = 'relu'))
  model.add(MaxPooling2D(pool_size=(2,2)))
  model.add(BatchNormalization())

  # Fourth conv pool block consiting of 3 conv layers:
  model.add(Conv2D(filters = 512, kernel_size=(3, 3), padding = 'same', 
                  activation = 'relu'))
  model.add(Conv2D(filters = 512, kernel_size=(3, 3), padding = 'same', 
                  activation = 'relu'))
  model.add(Conv2D(filters = 512, kernel_size=(3, 3), padding = 'same', 
                  activation = 'relu'))
  model.add(MaxPooling2D(pool_size=(2,2), padding='same')) # Added padding due to error
  model.add(BatchNormalization())

  # Fifth conv pool block consiting of 3 conv layers:
  model.add(Conv2D(filters = 512, kernel_size=(3, 3), padding = 'same', 
                  activation = 'relu'))
  model.add(Conv2D(filters = 512, kernel_size=(3, 3), padding = 'same', 
                  activation = 'relu'))
  model.add(Conv2D(filters = 512, kernel_size=(3, 3), padding = 'same', 
                  activation = 'relu'))
  model.add(MaxPooling2D(pool_size=(2,2)))
  model.add(BatchNormalization())
  
  # Fully connected dense neural networks:
  model.add(Flatten())
  model.add(Dense(units = 4096, activation='relu'))
  model.add(Dropout(0.5))
  model.add(Dense(units = 4096, activation='relu'))
  model.add(Dropout(0.5))
  model.add(Dense(units = 1000, activation='relu'))
  model.add(Dropout(0.5))

  # O/p layer
  model.add(Dense(10, activation='softmax'))

  # compile model
  model.compile(loss = 'categorical_crossentropy', optimizer = 'nadam', metrics = ['accuracy'])

  # Display model summary
  print(model.summary())

  # Return model
  return model


In [10]:
mnist_model = vgg16((28, 28, 1))

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 64)        640       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 64)        36928     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 64)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 14, 14, 64)        256       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 128)       73856     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 14, 14, 128)       147584    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 128)         0

In [11]:
# Setting model checkpoint
# Set o/p directory
output_dir_mnist = 'model_output/vgg16/mnist'

modelcheckpoint = ModelCheckpoint(filepath=output_dir_mnist + '/weights.{epoch:02d}.hdf5', 
                                  monitor = 'val_accuracy', save_best_only = True)

# Verifying o/p path exists
if not os.path.exists(output_dir_mnist):
  os.makedirs(output_dir_mnist)

In [12]:
mnist_model.fit(X_train_mnist, y_train_mnist, 
           batch_size=128, epochs = 4, verbose = 1, 
          validation_data = (X_valid_mnist, y_valid_mnist), 
          callbacks = [modelcheckpoint])

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.callbacks.History at 0x7fee30e57ef0>

In [13]:
# Setting model checkpoint
# Set o/p directory
output_dir_cifar = 'model_output/vgg16/cifar'

modelcheckpoint = ModelCheckpoint(filepath=output_dir_cifar + '/weights.{epoch:02d}.hdf5', 
                                  monitor = 'val_accuracy', save_best_only = True)

# Verifying o/p path exists
if not os.path.exists(output_dir_cifar):
  os.makedirs(output_dir_cifar)

In [18]:
cifar_model = vgg16((32, 32, 3))


Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_26 (Conv2D)           (None, 32, 32, 64)        1792      
_________________________________________________________________
conv2d_27 (Conv2D)           (None, 32, 32, 64)        36928     
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 16, 16, 64)        0         
_________________________________________________________________
batch_normalization_10 (Batc (None, 16, 16, 64)        256       
_________________________________________________________________
conv2d_28 (Conv2D)           (None, 16, 16, 128)       73856     
_________________________________________________________________
conv2d_29 (Conv2D)           (None, 16, 16, 128)       147584    
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 8, 8, 128)        

In [19]:
cifar_model.fit(X_train_cifar, y_train_cifar, 
           batch_size=128, epochs = 20, verbose = 1, 
          validation_data = (X_valid_cifar, y_valid_cifar), 
          callbacks = [modelcheckpoint])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fed7c1f2780>

# Model evaluation

In [16]:
# Evaluating mnist model - 
mnist_model.load_weights(filepath = output_dir_mnist + '/weights.02.hdf5')
mnist_model.evaluate(X_valid_mnist, y_valid_mnist)



[0.08573029935359955, 0.9836999773979187]

In [20]:
# Evaluating cifar model - 
cifar_model.load_weights(filepath = output_dir_cifar + '/weights.18.hdf5')
cifar_model.evaluate(X_valid_cifar, y_valid_cifar)



[0.8399996161460876, 0.8317000269889832]

In [None]:
# As is evident, vgg16 architecture starts overfitting on mnist from second epoch
# This architecture with very deep neural networks was not created for small images as in cifar and mnist
# for these datasets. But for studying the architecture, we have used them.