In [None]:
'''
Unzip Commands:
!unzip ../input/dogs-vs-cats-redux-kernels-edition/test.zip
!unzip ../input/dogs-vs-cats-redux-kernels-edition/train.zip
'''

import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
import random
import gc
import matplotlib.image as mpimg

trainDir = './train'
testDir = './test'

trainDogs = ['./train/{}'.format(i) for i in os.listdir(trainDir) if 'dog' in i]
trainCats = ['./train/{}'.format(i) for i in os.listdir(trainDir) if 'cat' in i]

testImages = ['./test/{}'.format(i) for i in os.listdir(testDir)]

trainImages = trainDogs[:2000] + trainCats[:2000]
random.shuffle(trainImages)

del trainDogs
del trainCats
gc.collect()

def displayUnprocessedimages():
    for image in trainImages[0:3]:
        image = mpimg.imread(image)
        imagePlot = plt.imshow(image)
        plt.show()
    
nrows = 150
ncolumns = 150
channels = 3

def readAndProcess(imagesListInput):
    imagesListOutput = []
    labelsListOutput = []
    
    for image in imagesListInput:
        imagesListOutput.append(cv2.cvtColor((cv2.resize(cv2.imread(image, cv2.IMREAD_COLOR), (nrows, ncolumns), interpolation = cv2.INTER_CUBIC)), cv2.COLOR_BGR2RGB))
        if 'dog' in image:
            labelsListOutput.append(1)
        elif 'cat' in image:
            labelsListOutput.append(0)
            
    return imagesListOutput, labelsListOutput

x, y = readAndProcess(trainImages)

plt.figure(figsize = (20, 10))
columns = 5

for i in range(columns):
    plt.subplot(5 / columns + 1, columns, i + 1)
    plt.imshow(x[i])

In [None]:
#Output number of labels (1, 0), should be 2000 of each

import seaborn as sns
del trainImages
gc.collect()

x = np.array(x)
y = np.array(y)

sns.countplot(y)
plt.title('Labels for Cats & Dogs')

In [None]:
#Output array shape, for Keras model
#e.g. (4000, 150, 150, 3) = 400 Images, with resolution of 150x150, with RGB channels
print ("Shape of train images is:", x.shape)
print("Shape of labels is:", y.shape)

In [None]:
#Split data into train and test sets
from sklearn.model_selection import train_test_split
xTrain, xVal, yTrain, yVal = train_test_split(x, y, test_size = 0.2, random_state = 2)

print ("Shape of train images is:", xTrain.shape)
print ("Shape of validation images is:", xVal.shape)
print ("Shape of labels is:", yTrain.shape)
print ("Shape of labels is:", yVal.shape)

In [None]:
del x
del y
gc.collect()

nTrain = len(xTrain)
nVal = len(xVal)

batchSize = 32

from keras import layers
from keras import models
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array, load_img

model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation = 'relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation = 'relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation = 'relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512, activation = 'relu'))
model.add(layers.Dense(1, activation = 'sigmoid'))

model.summary()

In [None]:
model.compile(loss = 'binary_crossentropy', optimizer = optimizers.RMSprop(lr = 1e-4), metrics = ['acc'])

trainDatagen = ImageDataGenerator(rescale = 1./255,
                                 rotation_range = 40,
                                 width_shift_range = 0.2,
                                 height_shift_range = 0.2,
                                 shear_range = 0.2,
                                 zoom_range = 0.2,
                                 horizontal_flip = True,)

val_datagen = ImageDataGenerator(rescale = 1./255)