In [2]:
import glob
import numpy as np
import cv2
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

from keras.utils import np_utils
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten, Lambda
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.layers import Conv2D, MaxPooling2D

SEED = 2017

Using TensorFlow backend.


In [None]:
# Kaggle比赛数据集
# downloaded at https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/data

In [None]:
DATA_DIR = '../Data/PetImages/'
cats = glob.glob(DATA_DIR + "Cat/*.jpg")
dogs = glob.glob(DATA_DIR + "Dog/*.jpg")

print('#Cats: {}, #Dogs: {}'.format(len(cats), len(dogs)))

In [None]:
dogs_train, dogs_val, cats_train, cats_val = train_test_split(dogs, cats, test_size=0.2, random_state=SEED)

In [None]:
n_examples = 3
plt.figure(figsize=(15, 15))
i = 1
for _ in range(n_examples):
    image_cat = cats_train[np.random.randint(len(cats_train))]
    img_cat = cv2.imread(image_cat)
    img_cat = cv2.cvtColor(img_cat, cv2.COLOR_BGR2RGB)
    plt.subplot(3, 2, i)
    _ = plt.imshow(img_cat)
    i += 1
    image_dog = dogs_train[np.random.randint(len(dogs_train))]
    img_dog = cv2.imread(image_dog)
    img_dog = cv2.cvtColor(img_dog, cv2.COLOR_BGR2RGB)
    plt.subplot(3, 2, i)
    i += 1
    _ = plt.imshow(img_dog)
plt.show()

In [None]:
def batchgen(cats, dogs, batch_size, img_size=50):
    # Create empty numpy arrays
    batch_images = np.zeros((batch_size, img_size, img_size, 3))
    batch_label = np.zeros(batch_size)

    # Custom batch generator 
    while 1:
        n = 0
        while n < batch_size:
            # Randomly pick a dog or cat image
            if np.random.randint(2) == 1:
                i = np.random.randint(len(dogs))
                img = cv2.imread(dogs[i])
                if img is None:
                    break
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                # The images have different dimensions, we resize all to 100x100
                img = cv2.resize(img, (img_size, img_size), interpolation = cv2.INTER_AREA)
                y = 1

            else:
                i = np.random.randint(len(cats))
                img = cv2.imread(cats[i])
                if img is None:
                    break
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = cv2.resize(img, (img_size, img_size), interpolation = cv2.INTER_AREA)
                y = 0
        
            batch_images[n] = img
            batch_label[n] = y
            n+=1
        yield batch_images, batch_label

In [None]:
def create_model(stride=1, padding='same', img_size=100):
    # Define architecture
    model = Sequential()
    model.add(Lambda(lambda x: (x / 255.) - 0.5, input_shape=(img_size, img_size, 3)))
    model.add(Conv2D(32, (3, 3), activation='relu', padding=padding, strides=stride))
    model.add(Conv2D(32, (3, 3), activation='relu', padding=padding, strides=stride))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.5))
    model.add(Conv2D(64, (3, 3), activation='relu', padding=padding, strides=stride))
    model.add(Conv2D(64, (3, 3), activation='relu', padding=padding, strides=stride))
    
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    
    opt = Adam(0.001)
    model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['binary_accuracy'])
    
    return model

In [None]:
img_size = 100

models = []
for stride in [1, 2]:
    for padding in ['same', 'valid']:
        model = create_model(stride, padding, img_size)
        pars = model.count_params()
        models.append(dict({'setting': '{}_{}'.format(stride, padding), 
                'model': model,
                'parameters': pars
                }))

In [None]:
models[0]['model'].summary()

In [None]:
callbacks = [EarlyStopping(monitor='val_binary_accuracy', patience=5)]

In [None]:
batch_size = 512
n_epochs = 500
validation_steps = round((len(dogs_val)+len(cats_val))/batch_size)
steps_per_epoch = round((len(dogs_train)+len(cats_train))/batch_size)

train_generator = batchgen(dogs_train, cats_train, batch_size, img_size)
val_generator = batchgen(dogs_val, cats_val, batch_size, img_size)

history = []
for i in range(len(models)):
    print(models[i])
    history.append(
        models[i]['model'].
        fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=n_epochs, 
                            validation_data=val_generator, validation_steps=validation_steps,
                            callbacks=callbacks
                           )
    )

In [None]:
for i in range(len(models)):
    plt.plot(range(len(history[i].history['val_binary_accuracy'])), history[i].history['val_binary_accuracy'], label=models[i]['setting'])
    print('Max accuracy model {}: {}'.format(models[i]['setting'], max(history[i].history['val_binary_accuracy'])))
plt.title('Validation accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

In [None]:
for i in range(len(models)):
    plt.plot(range(len(history[i].history['val_binary_accuracy'])), history[i].history['val_binary_accuracy'], label=models[i]['setting'])
    print('Max accuracy model {}: {} (#parameters: {})'.format(models[i]['setting'], max(history[i].history['val_binary_accuracy']), models[i]['parameters']))