In [48]:
from keras.applications import VGG19, inception_v3
from keras import models, layers, optimizers, regularizers
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

In [None]:
# !unzip data/hot-dog-not-hot-dog.zip -d data/ # sample code

## Importing the Data and Preprocessing 

In [29]:
data_te = ImageDataGenerator(rescale = 1./255).flow_from_directory('DATASET/TEST', 
                                                                   target_size = (224, 224), 
                                                                   batch_size = 2513,
                                                                   seed = 123)

data_tr = ImageDataGenerator(rescale = 1./255).flow_from_directory('DATASET/TRAIN', 
                                                                   target_size = (224, 224),  
                                                                   batch_size = 22564,
                                                                   seed = 123)

Found 2513 images belonging to 2 classes.
Found 22564 images belonging to 2 classes.


In [30]:
mod_te = ImageDataGenerator(rescale = 1./255, 
                            rotation_range = 360, 
                            width_shift_range = 0.3, 
                            height_shift_range = 0.3, 
                            brightness_range = [0.2, 1.0], 
                            horizontal_flip = True, 
                            vertical_flip = True, 
                            zoom_range = [0.5, 1.5]).flow_from_directory('DATASET/TEST', 
                                                                   target_size = (224, 224), 
                                                                   batch_size = 1000,
                                                                   seed = 123)

mod_tr = ImageDataGenerator(rescale = 1./255, 
                            rotation_range = 360, 
                            width_shift_range = 0.3, 
                            height_shift_range = 0.3, 
                            brightness_range = [0.2, 1.0], 
                            horizontal_flip = True, 
                            vertical_flip = True, 
                            zoom_range = [0.5, 1.5]).flow_from_directory('DATASET/TRAIN', 
                                                                   target_size = (224, 224), 
                                                                   batch_size = 5000,
                                                                   seed = 123)

Found 2513 images belonging to 2 classes.
Found 22564 images belonging to 2 classes.


In [4]:
images_te, labels_te = next(data_te)
images_tr, labels_tr = next(data_tr)

In [31]:
images_mod_te, labels_mod_te = next(mod_te)
images_mod_tr, labels_mod_tr = next(mod_tr)

In [6]:
print(images_te.shape)
print(images_tr.shape)

(2513, 224, 224, 3)
(22564, 224, 224, 3)


In [32]:
print(images_mod_te.shape)
print(images_mod_tr.shape)

(32, 64, 64, 3)
(32, 64, 64, 3)


In [8]:
print(labels_te.shape)
print(labels_tr.shape)

(2513, 2)
(22564, 2)


In [5]:
print(labels_mod_te.shape)
print(labels_mod_tr.shape)

(625, 2)
(1575, 2)


In [6]:
images = np.concatenate((images_tr, images_te, images_mod_tr, images_mod_te))
# images = np.concatenate((images_mod_tr, images_mod_te))

In [7]:
images.shape

(2200, 224, 224, 3)

In [8]:
labels = np.concatenate((labels_tr[:,0], labels_te[:,0], labels_mod_tr[:,0], labels_mod_te[:, 0]))
# labels = np.concatenate((labels_mod_tr[:,0], labels_mod_te[:, 0]))

In [9]:
labels.shape

(2200,)

In [None]:
plt.imshow(images[2])
plt.show()
print(labels[2])

In [24]:
X_model, X_test, y_model, y_test = train_test_split(images, labels, test_size = 0.2, random_state = 123)

In [25]:
X_train, X_val, y_train, y_val = train_test_split(X_model, y_model, test_size = 0.2, random_state = 123)

In [26]:
print(len(X_train), '\n')
print(len(X_val), '\n')
print(len(X_test), '\n')

1408 

352 

440 



In [27]:
print(sum(y_train)/1408)
print(sum(y_val)/352)
print(sum(y_test)/440)

0.5355113636363636
0.5568181818181818
0.55


## Building CNN Models 

### VGG19 

In [52]:
cnn_vgg_base = VGG19(weights = 'imagenet', include_top = False, input_shape = (224, 224, 3))
model_1 = models.Sequential()
model_1.add(cnn_vgg_base)
model_1.add(layers.Flatten())
model_1.add(layers.Dense(1024, activation='relu', kernel_initializer = 'glorot_uniform'))
model_1.add(layers.Dropout(0.2, seed = 123))
model_1.add(layers.Dense(1024, activation='relu', kernel_initializer = 'glorot_uniform'))
model_1.add(layers.Dropout(0.2, seed = 123))
model_1.add(layers.Dense(512, activation='relu', kernel_initializer = 'glorot_uniform'))
model_1.add(layers.Dropout(0.2, seed = 123))
model_1.add(layers.Dense(1, activation='sigmoid'))

In [16]:
for layer in model_1.layers:
    print(layer.name, layer.trainable)

vgg19 True
flatten_1 True
dense_1 True
dense_2 True
dense_3 True
dense_4 True


In [13]:
cnn_vgg_base.trainable = False

In [18]:
for layer in model_1.layers:
    print(layer.name, layer.trainable)

vgg19 False
flatten_1 True
dense_1 True
dense_2 True
dense_3 True
dense_4 True


In [14]:
model_1.compile(loss = 'binary_crossentropy', optimizer = optimizers.Adam(), metrics = ['acc'])

In [46]:
model_1.fit(X_train, 
            y_train, 
            batch_size = 16, 
            epochs = 100, 
            validation_data = (X_val, y_val) )

In [None]:
model_1.save('vgg19.h5')
# models.load_model('vgg19.h5')

In [None]:
model_1.evaluate(X_test, y_test)

In [None]:
# mod_1_pred = np.around(model_1.predict(X_test))
mod_1_test_pred = model_1.predict_classes(X_test)

In [None]:
print(confusion_matrix(y_test, mod_1_test_pred))
print(classification_report(y_test, mod_1_test_pred))

In [None]:
# yellowbrick doesn't work with keras; change class labels; see if this works
con_mat_1 = tf.math.confusion_matrix(labels=y_test, predictions=mod_1_test_pred).numpy()
con_mat_1_df = pd.DataFrame(con_mat_1,
                     index = classes, 
                     columns = classes)

figure = plt.figure(figsize=(8, 8))
sns.heatmap(con_mat_1_df, annot=True,cmap=plt.cm.Blues)
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
# plt.savefig('vgg19_conf_mat.png')

### InceptionV3 

In [51]:
cnn_incv3_base = inception_v3.InceptionV3(weights = 'imagenet', include_top = False, input_shape = (224, 224, 3))
model_2 = models.Sequential()
model_2.add(cnn_incv3_base)
model_2.add(layers.Flatten())
model_2.add(layers.Dropout(0.2, seed = 123))
model_2.add(layers.Dense(1024, activation='relu', kernel_initializer = 'glorot_uniform'))
model_2.add(layers.Dropout(0.2, seed = 123))
model_2.add(layers.Dense(512, activation='relu', kernel_initializer = 'glorot_uniform'))
model_2.add(layers.Dropout(0.2, seed = 123))
model_2.add(layers.Dense(1, activation='sigmoid'))

In [43]:
cnn_incv3_base.trainable = False

In [45]:
model_2.compile(loss = 'binary_crossentropy', optimizer = optimizers.Adam(), metrics = ['acc'])

In [None]:
model_2.fit(X_train, 
            y_train, 
            batch_size = 16, 
            epochs = 100, 
            validation_data = (X_val, y_val) )

In [None]:
model_2.save('incv3.h5')
# models.load_model('incv3.h5')

In [None]:
model_2.evaluate(X_test, y_test)

In [None]:
mod_2_pred = np.around(model_2.predict(X_test))

In [None]:
print(confusion_matrix(y_test, mod_2_pred))
print(classification_report(y_test, mod_2_pred))