#VGG16 image model

In [None]:
'''
data/
  train(train_data)/
    hateful/
      01235.png
      01247.png
      ...
    non-hateful/
      01236.png
      01245.png
      ...
  validation(dev_seen_data)/
    hateful/
      01456.png
      01726.png
      ...
    non-hateful/
      01268.png
      01472.png
      ...

'''

In [None]:
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing.image import ImageDataGenerator, array_to_img
from keras.models import Model, Sequential
from keras.layers import Conv2D, MaxPooling2D, Activation, Dropout, Flatten, Dense
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.optimizers import SGD
from keras import backend as K
import pandas as pd
import numpy as np

from keras.utils.vis_utils import plot_model
from IPython.display import display
import matplotlib.pyplot as plt

In [None]:
#image dimensions
img_width, img_height = 224,224

In [None]:
train_data_dir = "/content/drive/MyDrive/data/train_data"
validation_data_dir = "/content/drive/MyDrive/data/dev_seen_data"
# nb_train_samples = 2000
# nb_validation_samples = 100
# epochs = 50
batch_size = 16

In [None]:
#image data is represented in three dim-array where first channel represents the colour channels:[channels][rows][columns]
if K.image_data_format() == 'channels_first':
  input_shape = (3, img_width, img_height)
else:
  input_shape = (img_width, img_height, 3)

In [None]:
#Wrapper for preprocess_input() to make it compatible to use with ImageDataGenerator's preprocessing_function
def preprocess_vgg(x):
  X = np.expand_dims(x, axis=0)
  X = preprocess_input(X)
  return X[0]

In [None]:
#Intializing VGG16 with Imagenet weights
vgg16 = VGG16(weights='imagenet')

In [None]:
x = vgg16.get_layer('fc2').output
prediction = Dense(2, activation='softmax', name='predictions')(x)

img_model = Model(inputs=vgg16.input, outputs=prediction)

In [None]:
#freezing all the layers except bottlenecj layer for fine tuning
for layer in img_model.layers:
  if layer.name in ['predictions']:
    continue
  layer.trainable = False

In [None]:
df = pd.DataFrame(([layer.name, layer.trainable] for layer in img_model.layers), columns=['layer','trainable'])

In [None]:
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_vgg,
                                   rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   fill_mode='nearest')
train_generator = train_datagen.flow_from_directory(directory=train_data_dir,
                                                    target_size=[img_width, img_height],
                                                    batch_size=batch_size,
                                                    class_mode='categorical')

In [None]:
validation_datagen = ImageDataGenerator(preprocessing_function=preprocess_vgg)
validation_generator = validation_datagen.flow_from_directory(directory=validation_data_dir,
                                                              target_size=[img_width, img_height],
                                                              batch_size=batch_size,
                                                              class_mode='categorical')

In [None]:
#compile SGD optimizer with small learning rate
sgd = SGD(lr=1e-4, momentum=0.9)
img_model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
plot_model(img_model, to_file='/content/VGG16_img_model.png', show_shapes=True, show_layer_names=True)

In [None]:
history = img_model.fit_generator(
    train_generator,
    steps_per_epoch=2000 // batch_size,
    epochs=200,
    validation_data = validation_generator,
    validation_steps=100 // batch_size
)

In [None]:
img_model.save_weights('/content/drive/MyDrive/data/vgg16_hateful_nonhateful_dense2.h5')

In [None]:
model_json_final = img_model.to_json()
with open("/content/drive/MyDrive/data/vgg16_hateful_nonhateful_dense2.json", "w") as json_file:
  json_file.write(model_json_final)

In [None]:
plt.plot(history.history['accuracy'], label='train accuracy')
plt.plot(history.history['val_accuracy'], label='validation accuracy')
plt.title('Training history')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()
plt.ylim([0, 1]);

In [None]:
test_data_dir = "/content/drive/MyDrive/data/dev_unseen_data"

test_datagen = ImageDataGenerator(preprocessing_function=preprocess_vgg)
test_generator = test_datagen.flow_from_directory(directory=test_data_dir,
                                                              target_size=[img_width, img_height],
                                                              batch_size=batch_size,
                                                              class_mode='categorical')

In [None]:
#Confusion Matrix and Classification Report
Y_pred = img_model.predict(test_generator)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(test_generator.classes, y_pred))
print('Classification Report')
target_names = ['Non-Offensive', 'Offensive']
print(classification_report(test_generator.classes, y_pred, target_names=target_names))