In [1]:
#importing all the required dependencies
import os
import shutil
import random
import matplotlib
import numpy as np
from PIL import Image
from numpy import expand_dims
from keras.preprocessing.image import img_to_array, ImageDataGenerator, load_img
import matplotlib.pyplot as plt
from keras.models import model_from_json
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Conv2D, Flatten, Dropout, MaxPooling2D
from keras.callbacks import ModelCheckpoint
from keras.applications.inception_v3 import InceptionV3, preprocess_input

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])



**importing all the hot dog images from dataset/hd directory**

In [17]:
def image_data_loader(path):
    image_array_list = []
    for image_name in os.listdir(path):
        if image_name.split('.')[1] != 'jpg':
            continue
        else:
            image = Image.open(path+'/'+image_name)
            image_arr = np.asarray(image)
            image_arr_extended = np.expand_dims(image_arr, 0)
            image_array_list.append((image_arr_extended, image_name))
    return image_array_list

In [18]:
hd_image_array_list = image_data_loader('dataset/hd')
print('number of hotdog images available : {}'.format(len(hd_image_array_list)))
nhd_image_array_list = image_data_loader('dataset/nhd')
print('number of not hotdog images available : {}'.format(len(nhd_image_array_list)))

number of hotdog images available : 410
number of not hotdog images available : 4011


**The dataset is highly imbalanced, the not hot dog images are 10 times of hot dog images.
So, we perform data augmentation to hot dog images to increase their number.
We rely on different image data augmentation techniques such as horizontal shift, vertical shift, horizontal and vertical flip, rotation, zoom.**

**Below is the class which has several methods and each method performs one type of augmentation.
All the images after each type of augmentation are stored in hd_augmented directory**

In [19]:
class DataAug:
    @staticmethod
    def horizontal_shifter(image_array_list, variations_per_image):
        print('in shifter')
        for image_array in image_array_list:
            for i in range(variations_per_image):
                random_shift_fraction = random.uniform(0.2, 0.5)
                shift_array = [-random_shift_fraction,random_shift_fraction]
                horizontal_shift_datagen = ImageDataGenerator(width_shift_range=shift_array)
                image_iterator = horizontal_shift_datagen.flow(image_array[0], batch_size=1)
                image_to_save = Image.fromarray(image_iterator[0][0].astype('uint8'), 'RGB')
                image_to_save = image_to_save.resize((300, 300))
                image_to_save.save('hd_augmented/'+'horizontal_shift'+str(i)+'_'+image_array[1])
    @staticmethod
    def vertical_shifter(image_array_list, variations_per_image):
        for image_array in image_array_list:
            for i in range(variations_per_image):
                random_shift_fraction = random.uniform(0.2, 0.5)
                vertical_shift_datagen = ImageDataGenerator(height_shift_range=0.5)
                image_iterator = vertical_shift_datagen.flow(image_array[0], batch_size=1)
                image_to_save = Image.fromarray(image_iterator[0][0].astype('uint8'), 'RGB')
                image_to_save = image_to_save.resize((300, 300))
                image_to_save.save('hd_augmented/'+'vertical_shift'+str(i)+'_'+image_array[1])
    @staticmethod
    def horizontal_flipper(image_array_list, variations_per_image):
        for image_array in image_array_list:
            for i in range(variations_per_image):
                horizontal_flip_datagen = ImageDataGenerator(horizontal_flip=True)
                image_iterator = horizontal_flip_datagen.flow(image_array[0], batch_size=1)
                image_to_save = Image.fromarray(image_iterator[0][0].astype('uint8'), 'RGB')
                image_to_save = image_to_save.resize((300, 300))
                image_to_save.save('hd_augmented/'+'horizontal_flip'+str(i)+'_'+image_array[1])
    @staticmethod
    def vertical_flipper(image_array_list, variations_per_image):
        for image_array in image_array_list:
            for i in range(variations_per_image):
                vertical_flip_datagen = ImageDataGenerator(vertical_flip=True)
                image_iterator = vertical_flip_datagen.flow(image_array[0], batch_size=1)
                image_to_save = Image.fromarray(image_iterator[0][0].astype('uint8'), 'RGB')
                image_to_save = image_to_save.resize((300, 300))
                image_to_save.save('hd_augmented/'+'vertical_flip'+str(i)+'_'+image_array[1])
    @staticmethod
    def image_rotator(image_array_list, variations_per_image, angles_list):
        for image_array in image_array_list:
            for i in range(variations_per_image):
                for angle in angles_list:
                    rotation_datagen = ImageDataGenerator(rotation_range=angle)
                    image_iterator = rotation_datagen.flow(image_array[0], batch_size=1)
                    image_to_save = Image.fromarray(image_iterator[0][0].astype('uint8'), 'RGB')
                    image_to_save = image_to_save.resize((300, 300))
                    image_to_save.save('hd_augmented/'+'rot'+str(i)+str(angle)+'_'+image_array[1])
    @staticmethod
    def image_zoomer(image_array_list, variations_per_image, zoom_list):
        for image_array in image_array_list:
            for i in range(variations_per_image):
                for zoom_val in zoom_list:
                    zoom_datagen = ImageDataGenerator(zoom_range=zoom_list)
                    image_iterator = zoom_datagen.flow(image_array[0], batch_size=1)
                    image_to_save = Image.fromarray(image_iterator[0][0].astype('uint8'), 'RGB')
                    image_to_save = image_to_save.resize((300, 300))
                    image_to_save.save('hd_augmented/'+'zoom'+str(i)+str(zoom_val)+'_'+image_array[1])
    

In [20]:
dataaug = DataAug()
dataaug.horizontal_shifter(hd_image_array_list, 2)

in shifter


In [21]:
dataaug.vertical_shifter(hd_image_array_list, 2)

In [22]:
dataaug.horizontal_flipper(hd_image_array_list, 1)

In [23]:
dataaug.vertical_flipper(hd_image_array_list, 1)

In [24]:
dataaug.image_rotator(hd_image_array_list, 1, [60, 135])

In [25]:
dataaug.image_zoomer(hd_image_array_list, 1,[0.6, 1.5])

In [27]:
#also, resizing the original images in dataset/hd diretory to (300, 300)
# and saving them in hg_augmented directory
for image_name in os.listdir('dataset/hd'):
    if image_name.split('.')[-1] != 'jpg':
        continue
    else:
        image = Image.open('dataset/hd/'+image_name)
        image_resized = image.resize((300, 300))
        image_resized.save('hd_augmented/'+image_name)

In [29]:
print('number of augmented hot dog images = {}'.format(len(os.listdir('hd_augmented'))))

number of augmented hot dog images = 4510


**We've successfully brought the image dataset of hot dog images to 4510, which is balanced with the number of not hot dog images.
Now the dataset is approximately balanced and we can proceed with building the model**

**Splitting the data into train and test sets with a 0.8 ratio.
Also, we are organizing the train and test data in a final_dataset directory for better organization
Here is the structure of the directory:**

final_dataset

    --train
        --hd
        --nhd
    --test
        --hd
        --nhd

In [30]:
split = 0.8

In [31]:
hd_im_list = os.listdir('hd_augmented')
train_hd_im_list = random.sample(hd_im_list, int(split*len(hd_im_list)))
print('number of train hotdog images: {} \nnumber of test hotdog images: {}'
         .format(len(train_hd_im_list), (len(hd_im_list)-len(train_hd_im_list))))

number of train hotdog images: 3608 
number of test hotdog images: 902


In [None]:
for image_name in hd_im_list:
    if image_name in train_hd_im_list:
        shutil.copyfile('hd_augmented/'+image_name, 'final_dataset/train/hd/'+image_name)
    else:
        shutil.copyfile('hd_augmented/'+image_name, 'final_dataset/test/hd/'+image_name)

In [35]:
print(len(os.listdir('final_dataset/train/hd/')))

3608


In [36]:
nhd_im_list = os.listdir('dataset/nhd')
train_nhd_im_list = random.sample(nhd_im_list, int(split*len(nhd_im_list)))
print('number of train not hotdog images: {} \nnumber of test not hotdog images: {}'
         .format(len(train_nhd_im_list), (len(nhd_im_list)-len(train_nhd_im_list))))

number of train not hotdog images: 3208 
number of test not hotdog images: 803


In [37]:
for image_name in nhd_im_list:
    if image_name in train_nhd_im_list:
        shutil.copyfile('dataset/nhd/'+image_name, 'final_dataset/train/nhd/'+image_name)
    else:
        shutil.copyfile('dataset/nhd/'+image_name, 'final_dataset/test/nhd/'+image_name)

In [39]:
print(len(os.listdir('final_dataset/test/nhd/')))

803


**Using Keras ImageDataGenerator class, we are creating image data iterators**

In [40]:
datagen = ImageDataGenerator(rescale=1. / 255)

In [41]:
train_iterator = datagen.flow_from_directory('final_dataset/train', batch_size=32, 
                                             target_size=(300, 300),class_mode='binary', shuffle=False)
test_iterator = datagen.flow_from_directory('final_dataset/test', batch_size=32, 
                                            target_size=(300, 300),class_mode='binary', shuffle=False)

Found 6816 images belonging to 2 classes.
Found 1705 images belonging to 2 classes.


In [42]:
train_iterator.class_indices

{'hd': 0, 'nhd': 1}

In [43]:
train_steps = train_iterator.n // train_iterator.batch_size
test_steps = test_iterator.n // test_iterator.batch_size

**Using transfer learning, extracting image features(convoluted and pooled features) from InceptionV3 CNN network.** 

In [44]:
model = InceptionV3(weights='imagenet', include_top=False, input_shape=(300, 300, 3))

**Extracting the features and saving them for further use**

In [45]:
pre_trained_train_features = model.predict_generator(train_iterator, train_steps, verbose=1)
print(pre_trained_train_features.shape)
np.savez('pre_trained_train_features', features=pre_trained_train_features)

(6816, 8, 8, 2048)


In [46]:
len(os.listdir('hd_augmented'))

4510

In [47]:
pre_trained_test_features = model.predict_generator(test_iterator, test_steps, verbose=1)
print(pre_trained_test_features.shape)
np.savez('pre_trained_test_features', features=pre_trained_test_features)

(1696, 8, 8, 2048)


In [48]:
if os.path.exists('pre_trained_train_features.npz'):
    train_data = np.load('pre_trained_train_features.npz')['features']
else:
    train_data = pre_trained_train_features
if os.path.exists('pre_trained_test_features.npz'):
    test_data = np.load('pre_trained_test_features.npz')['features']
else:
    test_data = pre_trained_test_features

In [26]:
os.path.exists('pre_trained_test_features.npz')

True

In [51]:
train_data.shape, train_labels.shape

((6816, 8, 8, 2048), (6816,))

**Creating the label vectors for train and test data**

In [50]:
# number of train examples = 6816, so an array of labels of that size is create
train_labels = np.array([0] * 3608 + [1] * 3208)
# number of test examples = 1705, so witha batch size of 32, we get 1696 examples(last 9 are ignored) with 53 examples per batch
test_labels = np.array([0] * 902 + [1] * 794)

**Defining and building a custom CNN model.
This model is trained on the pre computed image features from InceptionV3 model.**

In [52]:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=train_data.shape[1:], padding='same'))
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Dropout(0.2))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.6))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))



**Creating a model checkpoint so that the model weights are to be saved when the validation loss is lowest**

In [53]:
checkpointer = ModelCheckpoint(filepath='custom_model_checkpoint.hdf5', verbose=1, save_best_only=True)

In [54]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'])

history = model.fit(train_data, train_labels, epochs=50, batch_size=32,
          validation_split=0.3,
          verbose=2,
          callbacks=[checkpointer], shuffle=True)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 4771 samples, validate on 2045 samples
Epoch 1/50
 - 17s - loss: 0.3003 - binary_accuracy: 0.8818 - val_loss: 0.2671 - val_binary_accuracy: 0.9589

Epoch 00001: val_loss improved from inf to 0.26711, saving model to custom_model_checkpoint.hdf5
Epoch 2/50
 - 16s - loss: 0.1652 - binary_accuracy: 0.9363 - val_loss: 0.1517 - val_binary_accuracy: 0.9565

Epoch 00002: val_loss improved from 0.26711 to 0.15170, saving model to custom_model_checkpoint.hdf5
Epoch 3/50
 - 16s - loss: 0.1111 - binary_accuracy: 0.9652 - val_loss: 0.2503 - val_binary_accuracy: 0.8782

Epoch 00003: val_loss did not improve from 0.15170
Epoch 4/50
 - 16s - loss: 0.1046 - binary_accuracy: 0.9667 - val_loss: 0.4412 - val_binary_accuracy: 0.8254

Epoch 00004: val_loss did not improve from 0.15170
Epoch 5/50
 - 16s - loss: 0.0771 - binary_accuracy: 0.9740 - val_loss: 0.3319 - val_binary_accuracy: 0.8709

Epoch 00005: 

**Getting the predictions on test data.
The model predicts 0 for a hot dog image and 1 for not hot dog iamge.**

In [56]:
predictions = model.predict_classes(test_data, batch_size=32, verbose=1)
len(predictions)



1696

**Saving the model architecture into JSON file**

In [57]:
model_json = model.to_json()
with open("custom_model.json", "w") as json_file:
    json_file.write(model_json)

**Calculating the confusion matrix and calculating the metrics from it**

In [58]:
from sklearn.metrics import confusion_matrix
conf_mat = confusion_matrix(test_labels, predictions)
print(conf_mat)

[[879  23]
 [ 72 722]]


In [59]:
#metrics for not hod dog images
precision_nhd = conf_mat[1][1]/(conf_mat[1][1]+conf_mat[1][0])
recall_nhd = conf_mat[0][0]/(conf_mat[0][0]+conf_mat[1][0])
f1_score_nhd = 2*(precision_nhd*recall_nhd)/(precision_nhd+recall_nhd)
print('for not hotdog images')
print('precision: {}\nrecall: {}\nf1_score: {}'.format(precision_nhd, recall_nhd, f1_score_nhd))

for not hotdog images
precision: 0.9093198992443325
recall: 0.9242902208201893
f1_score: 0.9167439482054793


In [60]:
#metrics for hot dog images
precision_hd = conf_mat[0][0]/(conf_mat[0][0]+conf_mat[0][1])
recall_hd = conf_mat[1][1]/(conf_mat[1][1]+conf_mat[0][1])
f1_score_hd = 2*(precision_nhd*recall_nhd)/(precision_nhd+recall_nhd)
print('for hot dog images:')
print('precision: {}\nrecall: {}\nf1_score: {}'.format(precision_hd, recall_hd, f1_score_hd))

for hot dog images:
precision: 0.9745011086474501
recall: 0.9691275167785235
f1_score: 0.9167439482054793


In [61]:
test_accuracy = (conf_mat[0][0]+conf_mat[1][1])/(conf_mat[0][0]+conf_mat[0][1]+conf_mat[1][0]+conf_mat[1][1])
print('overall test accuracy: {}'.format(test_accuracy))

overall test accuracy: 0.9439858490566038


**Function to run inference.
All the images for inference must be placed in inference/images_infer directory**

In [2]:
def inference(path_to_directory):
    datagen = ImageDataGenerator(rescale=1. / 255)
    test_it = datagen.flow_from_directory(path_to_directory, batch_size=32, 
                                            target_size=(300, 300),class_mode='binary', shuffle=False)
    pre_tr_model_test = InceptionV3(weights='imagenet', include_top=False, input_shape=(300, 300, 3))
    test_im_features = pre_tr_model_test.predict_generator(test_it, verbose=1)
    with open('custom_model.json', 'r') as json_file:
        loaded_model = model_from_json(json_file.read())
    loaded_model.load_weights('custom_model_checkpoint.hdf5')
    predictions = loaded_model.predict_classes(test_im_features, batch_size=32, verbose=1)
    return predictions

In [3]:
#call the inference function and pass the path as argument
inference_predictions = inference('inference/')
inference_labels = []
for i in inference_predictions:
    if i[0] == 0:
        inference_labels.append('hotdog')
    else:
        inference_labels.append('not hotdog')
images_list = os.listdir('inference/images_infer')
for i in range(len(images_list)):
    print('name: {}, prediction: {}\n'.format(images_list[i], inference_labels[i]))

Found 4 images belonging to 1 classes.
Instructions for updating:
Colocations handled automatically by placer.
name: best-hot-dog-fp.jpg, prediction: hotdog

name: dog.jpeg, prediction: not hotdog

name: images.jpeg, prediction: hotdog

name: plane.jpeg, prediction: not hotdog



Failures:
Initially, I tried a custom CNN model without InceptionV3 transfer learning and it resulted in poor accuracy of 73%.

Scope:
Can also implement localization(i.e. where in the image is the hot dog) and also would try training with batch size=1 to see how the training time vs accuracy trade off is(batch_size=1 trains faster as it doesn't perform gradient descent and update weights for every batch.)