## Classification of VOC 2012 Dataset using VGG19

**Created:** 04/21/21

**Author:** Muqtada Hussain Mohammed

**Email:** muqtada.husn@gmail.com

Downloading VOC 2012 dataset (Update the link from http://host.robots.ox.ac.uk:8080/pascal/VOC/voc2012/#data if it is broken)

In [None]:
%%bash
gdown http://host.robots.ox.ac.uk:8080/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
tar -xvf VOCtrainval_11-May-2012.tar > /dev/null

This step is to import necessary libraries for building our neural networks. I have used  Keras primarily to build the model

In [None]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.applications.vgg19 import VGG19,preprocess_input,decode_predictions
from tensorflow.keras.applications import VGG19
from keras.models import Sequential
from keras.layers import Activation, Dense
from imutils import paths
from pathlib import Path
import xml.etree.ElementTree as ET
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import matplotlib.pyplot as plt


Mount Google Drive only if necessary (If you have files stored on google drive which works only on google colab)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Loading a pre-trained VGG19 model

In [None]:
model = VGG19(weights='imagenet', include_top=True)

Model summary of VGG19

In [None]:
model.summary()

In [None]:
#Getting second last layer of the model and redefine it according to the VOC2012s class size (20)
last_layer = model.layers[-2].output
new_model = Dense(20, activation='softmax', name='VOC2012')(last_layer)
new_model = Model(model.input, new_model, name='VGG19_VOC2012Classification')

#To reduce no of trainable parameters, VGG19 layers must be made non trainable
for layer in new_model.layers[:-1]: layer.trainable=False

#Compiling the new model
new_model.compile(optimizer='Adam',
              loss='categorical_crossentropy', metrics=['accuracy'])
new_model.summary()

Initializing data (x), label(y), class and path variables

In [None]:
data=[]
labels=[]
classes= {'person':0,'bird':1, 'cat':2, 'cow':3, 'dog':4, 'horse':5, 'sheep':6,'aeroplane':7, 'bicycle':8, 'boat':9, 'bus':10, 'car':11, 'motorbike':12, 'train':13,'bottle':14, 'chair':15, 'diningtable':16, 'pottedplant':17, 'sofa':18, 'tvmonitor':19}
pathlist = Path('/content/VOCdevkit/VOC2012/JPEGImages').rglob('*.jpg')

Preprocessing Data

In [None]:
#Loading image data and labels from the given path
for p in pathlist:
  y_label=np.zeros(20).astype(int)
  act_label=set()
  img_path=str(p)
  img = image.load_img(img_path, target_size=(224, 224))
  x = image.img_to_array(img)
  x = np.expand_dims(x, axis=0)
  x = preprocess_input(x)
  yhat = model.predict(x)
  label = decode_predictions(yhat)
  label = label[0][0]

#Printing VGG19 predicted label on the raw data since VGG19 is able to classify images into 1000 predefined classes
  #print('VGG19 Label: %s (%.2f%%)' % (label[1], label[2]*100))
  data.append(x)
  lbl_path=str(p).replace('.jpg','.xml').replace('JPEGImages','Annotations')
  tree = ET.parse(lbl_path)
  root = tree.getroot()
  for child in root:
      if child.tag=='object':
        for subchild in child:
          if subchild.tag=='name':
            act_label.add(subchild.text)

#Printing actual labels from the raw data to compare with VGG19 predicted labels
  #print(f'Actual Labels: {act_label}\n\n')
  for i in act_label:     #One hot encoding of the labels
      y_label[classes[i]]=1
  labels.append(np.array(y_label))

Converting and reshaping image/label data to make them compatible with VGG19 model input

In [None]:
data2=np.array(data)
data2=data2.reshape(50,224,224,3)
labels2=np.array(labels)

Printing number of data points for reference

In [None]:
print(f'No of data points: {len(data)}')

Training the modified model to classify given dataset into 20 classes, with 80-20% test-validation split

In [None]:
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1)

model_hist=new_model.fit(
    x=data2,
    y=labels2,
    batch_size=None,
    epochs=100,
    verbose=1,
    callbacks=[es],
    validation_split=0.2,
    validation_data=None,
    shuffle=True,
    class_weight=None,
    sample_weight=None,
    initial_epoch=0,
    steps_per_epoch=None,
    validation_steps=None,
    validation_batch_size=None,
    validation_freq=1,
    max_queue_size=10,
    workers=1,
    use_multiprocessing=False,
)

Plotting training history to understand training of the model

In [None]:
pyplot.plot(model_hist.history['loss'], label='train')
pyplot.plot(model_hist.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()

Making predictions on the given image using the trained model

In [None]:
  img = image.load_img('/content/drive/MyDrive/JPEGImages/xxxx.jpeg', target_size=(224, 224))
  plt.imshow(img)
  plt.axis('off')
  x = image.img_to_array(img)
  x = np.expand_dims(x, axis=0)
  x = preprocess_input(x)
  res_new = new_model.predict(x)
  for i in classes.keys():
    if classes[i]==res_new.argmax():
      print(f'This looks like a {i}')