In [1]:
from warnings import filterwarnings
filterwarnings('ignore')

In [2]:
from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras import callbacks
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing import image
from keras.preprocessing.image import load_img, ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from keras.utils import np_utils
from keras.layers.convolutional import  MaxPooling2D
import numpy as np
import os
import pandas as pd
import splitfolders
import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [3]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
from keras import backend as K

# define a function to plot the result from training step
def show_result(history): 
    
    # Print the result from the last epoch
    print('Last train accuracy: %s'%history.history['accuracy'][-1])
    print('Last validation accuracy: %s'%history.history['val_accuracy'][-1])
    
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    
    epochs = range(1, len(loss) + 1)   
    
    # Define a subplot 
    fig, axs = plt.subplots(1,2,figsize=(15,4))
    
    # Plot loss
    loss_plot = axs[0]
    
    loss_plot.plot(epochs, loss, 'c--', label='Training loss')
    loss_plot.plot(epochs, val_loss, 'b', label='Validation loss')
    loss_plot.set_title('Training and validation loss')
    loss_plot.set_xlabel('Epochs')
    loss_plot.set_ylabel('Loss')
    loss_plot.legend()
    
    # Plot accuracy
    acc_plot = axs[1]
    
    acc_plot.plot(epochs, acc, 'c--', label='Training acc')
    acc_plot.plot(epochs, val_acc, 'b', label='Validation acc')
    acc_plot.set_title('Training and validation accuracy')
    acc_plot.set_xlabel('Epochs')
    acc_plot.set_ylabel('Accuracy')
    acc_plot.legend()
    
def predict_class(model, image_file):
    test_image = image.load_img(image_file, target_size=(100,100))
    test_image = image.img_to_array(test_image)
    test_image /= 255.0
    test_image = np.expand_dims(test_image, axis=0)
    images = np.vstack([test_image])
    predict = model.predict_generator(images).argmax(axis=1)
    plt.imshow(test_image)
    if predict == 0:
            plt.xlabel('predict: cat')
    elif predict == 1:
        plt.xlabel('predict: dog')
    plt.show()
    return predict

### 1. Load data

In [4]:
train_dir =  './Cat_Dog_data2/train/'
test_dir =  './Cat_Dog_data2/test/'

In [5]:
IMAGE_WIDTH = 64
IMAGE_HEIGHT = 64
IMAGE_SIZE = (IMAGE_WIDTH, IMAGE_HEIGHT)

In [6]:
BATCH_SIZE = 32  

In [7]:
nb_train = len([name for name in os.listdir(train_dir) for name in os.listdir(train_dir+name)])
nb_test = len([name for name in os.listdir(test_dir) for name in os.listdir(test_dir+name)])
# nb_test = len(os.listdir(test_dir))

In [8]:
print(nb_train)
print(nb_test)

22500
2500


### 2. Data preprocessing and data augmentation

In [9]:
input_shape =  (IMAGE_WIDTH, IMAGE_HEIGHT,3)

In [10]:
train_datagen = ImageDataGenerator(
    rescale = 1./255.0,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    validation_split=0.2)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical', subset='training')

print(train_generator.class_indices)

Found 18000 images belonging to 2 classes.
{'cat': 0, 'dog': 1}


In [11]:
validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical', subset='validation')

print(validation_generator.class_indices)

Found 4500 images belonging to 2 classes.
{'cat': 0, 'dog': 1}


In [12]:
print('Training set')
filename, label_count = np.unique(train_generator.classes, return_counts=True)
print(filename)
print(label_count)

print('Validation set')
val_filename, val_label_count = np.unique(validation_generator.classes, return_counts=True)
print(val_filename)
print(val_label_count)

Training set
[0 1]
[9000 9000]
Validation set
[0 1]
[2250 2250]


In [13]:
train_generator[0][0][0].shape

(64, 64, 3)

In [14]:
test_datagen = ImageDataGenerator(rescale=1./255)

In [15]:
test_set = test_datagen.flow_from_directory(test_dir,
                                            target_size=IMAGE_SIZE,
                                            batch_size=BATCH_SIZE,
                                            shuffle=False)

Found 2500 images belonging to 2 classes.


### 3. Build model (Transfer Learning)

In [16]:
from tensorflow.keras.applications import vgg16

vgg = vgg16.VGG16(include_top=False, 
                  weights='imagenet',
                  input_shape=(64,64,3))

In [17]:
for layer in vgg.layers[:15]:
    layer.trainable = False

In [18]:
new_vgg = models.Sequential()
new_vgg.add(vgg)
new_vgg.add(layers.Flatten())
new_vgg.add(layers.Dense(128,activation='relu'))

new_vgg.add(layers.Dropout(0.2,name='dropout_2'))
new_vgg.add(layers.Dense(2,activation='softmax'))

new_vgg.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Functional)           (None, 2, 2, 512)         14714688  
_________________________________________________________________
flatten (Flatten)            (None, 2048)              0         
_________________________________________________________________
dense (Dense)                (None, 128)               262272    
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 258       
Total params: 14,977,218
Trainable params: 7,341,954
Non-trainable params: 7,635,264
_________________________________________________________________


In [21]:
opt = optimizers.SGD(lr=0.001, momentum=0.9)
new_vgg.compile(optimizer=opt,
           loss = 'categorical_crossentropy',
           metrics=['accuracy'])

In [None]:
history = new_vgg.fit_generator(train_generator,
                           epochs=25,               
                           steps_per_epoch = train_generator.samples // BATCH_SIZE,
                           validation_data=validation_generator,
                           validation_steps = validation_generator.samples // BATCH_SIZE,
                           verbose=1)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25

In [None]:
show_result(history)

In [None]:
test_set.reset
ytesthat = new_model.predict_generator(test_set)
df = pd.DataFrame({
    'filename':test_set.filenames,
    'predict':ytesthat[:,0],
    'y':test_set.classes
})