In [120]:
import numpy as np
import matplotlib.pyplot as plt
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)

from PIL import Image
from keras import preprocessing
from keras.utils import np_utils

In [121]:
data_dir = "../datasets/cat_dog_data/"

In [122]:
import os
cwd = os.getcwd()
os.chdir(cwd)
print(os.listdir(data_dir))

['.DS_Store', 'new_set', 'test_set', 'training_set']


In [123]:
# import the training files for cats and dogs
train_cats_files = []
train_path_cats = data_dir +"/training_set/cats/"
for path in os.listdir(train_path_cats):
    if '.jpg' in path:
        train_cats_files.append(os.path.join(train_path_cats, path))
        
train_dogs_files = []
train_path_dogs = data_dir +"/training_set/dogs/"
for path in os.listdir(train_path_dogs):
    if '.jpg' in path:
        train_dogs_files.append(os.path.join(train_path_dogs, path))
        
print("length of training file for cats: ", len(train_cats_files)) 
print('length of training file for dogs: ', len(train_dogs_files))

# import the test files for cats and dogs
test_cats_files = []
test_path_cats = data_dir +"/test_set/cats/"
for path in os.listdir(test_path_cats):
    if '.jpg' in path:
        test_cats_files.append(os.path.join(test_path_cats, path))
        
test_dogs_files = []
test_path_dogs = data_dir +"/test_set/dogs/"
for path in os.listdir(test_path_dogs):
    if '.jpg' in path:
        test_dogs_files.append(os.path.join(test_path_dogs, path))

print("length of test file for cats: ", len(test_cats_files)) 
print("length of test file for dogs: ", len(test_dogs_files))

length of training file for cats:  4000
length of training file for dogs:  4000
length of test file for cats:  1000
length of test file for dogs:  1000


In [124]:
d = 32 # image dimensions: using 32x32 pixels 
X_train_orig = np.zeros((8000, d, d, 3), dtype='float32') # length of training dataset is 8000, and each image is 32 x 32 x 3 
                                                          # ie: width x height x rgb

# converting each image in the training dataset into an array
for i in range(4000):                                     
    path = train_cats_files[i]
    img = preprocessing.image.load_img(path, target_size=(d, d))
    X_train_orig[i] = preprocessing.image.img_to_array(img)

for i in range(4000,8000):    
    path = train_dogs_files[i-4000]
    img = preprocessing.image.load_img(path, target_size=(d, d))
    X_train_orig[i] = preprocessing.image.img_to_array(img)    

print("Shape of training dataset: ", X_train_orig.shape)

X_test_orig = np.zeros((2000, d, d, 3), dtype='float32') # length of testing dataset is 2000, dimensions same as before

# converting each image in the testing dataset into an array
for i in range(1000):    
    path = test_cats_files[i]
    img = preprocessing.image.load_img(path, target_size=(d, d))
    X_test_orig[i] = preprocessing.image.img_to_array(img)

for i in range(1000,2000):    
    path = test_dogs_files[i-1000]
    img = preprocessing.image.load_img(path, target_size=(d, d))
    X_test_orig[i] = preprocessing.image.img_to_array(img)    

print("Shape of testing dataset: ", X_test_orig.shape)

Shape of training dataset:  (8000, 32, 32, 3)
Shape of testing dataset:  (2000, 32, 32, 3)


In [125]:
X_train = X_train_orig
X_test = X_test_orig


In [126]:
Y_train_orig = np.ones((4000,)) # 1 - 4000 are cat pictures so our label is 1
Y_train_orig = np.concatenate((Y_train_orig, np.zeros((4000,)))) # 4000 - 8000 are dog pictures so our label is 0
Y_train = Y_train_orig.reshape(-1) # reshape it into an array of shape 8000 x 1

print("At position 3 should be a cat:", Y_train[3])
print("At position 4002 should be a dog:", Y_train[4002])

print("shape of Y_train: ", Y_train.shape)


Y_test = np.ones((1000,)) # 1 - 1000 are cat pictures, so our label is 1
Y_test = np.concatenate((Y_test, np.zeros((1000,)))) # 1000 - 2000 are dog pictures so our label is 0
Y_test = Y_test.reshape(-1) # reshape it into an array of shape 2000 x 1

print("At position 3 should be a cat:", Y_test[3])
print("At position 1002 should be a dog:", Y_test[1002])

print("shape of Y_test: ", Y_test.shape)

At position 3 should be a cat: 1.0
At position 4002 should be a dog: 0.0
shape of Y_train:  (8000,)
At position 3 should be a cat: 1.0
At position 1002 should be a dog: 0.0
shape of Y_test:  (2000,)


In [127]:
# Normalize the data
X_train = X_train / 255.0
X_test = X_test / 255.0

In [131]:
# Convert each class label to a vector, either [0, 1] or [1, 0]
# [0, 1] if cat
# [1, 0] if dog
Y_train = np_utils.to_categorical(Y_train, num_classes=2)
Y_test = np_utils.to_categorical(Y_test, num_classes=2)

print(Y_train[3]) # cat
print(Y_train[4002]) # dog

[0. 1.]
[1. 0.]


In [107]:
from sklearn.metrics import confusion_matrix
import itertools

from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import RMSprop,Adam
from keras.callbacks import ReduceLROnPlateau

## Some explanation of what I'm doing at each step below:

model.add(Conv2d.....) - Adding a convolution layer with x number of filters. The size of each filter is determined by kernel_size. The 'strides' parameter determines by how much I move the filter at each step in the convolution process. If it's (1, 1), it indicates that I slide the filter by 1 pixel on the image at each step. 'padding = same' indicates that, given that my stride is (1, 1), I want the dimensions of my input to be the same as my output. The way it achieves this is by adding a border around the edge of each image (the thickness of the border is determined by the size of the input image and the size of the filter). We set the activation function as 'ReLU' for each convolution layer. 

model.add(MaxPool2D...) - Adding a pooling layer with the objective of downsizing the input. pool_size(2, 2) indicates that for each 2 x 2 region in the input, we take the maximum in that region, and map it to the corresponding region in the output. 

model.add(Dropout..) - Applying the Dropout regularization technique with a rate of the provided value

model.add(Flatten()) - Converts the input matricies into a 1d vector

model.add(Dense(...) - Creates a fully connected layer with the provided value as the number of neurons in the layer

In [108]:
model = Sequential()

model.add(Conv2D(filters = 50, kernel_size = (3,3), strides = (1,1), padding = 'Same', 
                 activation ='relu', input_shape = (32,32,3)))
model.add(Conv2D(filters = 75, kernel_size = (3,3), strides = (1,1), padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters = 120, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(500, activation = "relu")) 
model.add(Dropout(0.4))
model.add(Dense(250, activation = "relu"))
model.add(Dropout(0.1))

model.add(Dense(2, activation = 'softmax'))

In [109]:
optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999)

In [110]:
model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])

In [111]:
batch_size = 125
history = model.fit(X_train, Y_train, epochs=10, validation_data=(X_test, Y_test), 
                    steps_per_epoch=X_train.shape[0] // batch_size)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


With some tuning, we achieve an accuracy on the testing dataset of 78.4 %.