In [2]:
# Part 1 - Building the CNN

# Importing the Keras libraries and packages
import argparse
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.preprocessing import image
import numpy as np

Using TensorFlow backend.


In [2]:

model = Sequential()

Convolution and Pooling are both matrix operations using arithmetic on the image pixel values which we convert into matrices formats. 

**Convolution** is done for feature extraction by using a filter matrix (n x n) which is basically a smaller matrix (n < m) than the image matrix (m x m). We do multiplication and addition by 'striding' the filter matrix over the image matrix (covers n x n of input matrix at a time) and get a single value for each stride. Effectively the new matrix is a much smaller one.

**Pooling** is done to reduce the dimensionality but also make sure no information is lost. Similar to the filter, a pooling matirx strides over input matrix and selects the value that satisfies it's pooling condition. For eg: we are using max pooling. Thus for every stride, it chooses the max value from the current n x n matrix in it's stride.

Rectified Linear Unit is an activation function takes the output value of X and compares it to find the max(0,X). If X > 0 then it returns X, else of X < 0 it returns 0. 

In [None]:
# Convolution operation using a filter (no of filter,(shape of filter)),(res x res, 3 stands for RGB)
model.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), activation = 'relu'))

# Pooling -> to reduce matrix by selecting max image
model.add(MaxPooling2D(pool_size = (2, 2)))

model.add(Conv2D(32, (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))


# Flattening -> converts 2D arrays into vectors
model.add(Flatten())

# Dense layer for fully connected layer
model.add(Dense(units = 128, activation = 'relu'))
model.add(Dense(units = 1, activation = 'sigmoid'))

The optimizer makes sure we reach the local or global minima to ensure lowest value of the cost function. THe loss function calculates the difference between predicted and target values.

In [None]:
# Compiling the CNN
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

We use an image generator for the purpose of **Image Augmentation**. This is the process of taking images that are in our training dataset and manipulating them to create many altered versions of the same image. This both provides more images to train on, but can also help expose the classifier to a wider range of lighting, coloring situations as well as aspect ratio change or angle change so as to make the classifier more robust.

In [None]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale = 1./255, shear_range = 0.2, zoom_range = 0.2,horizontal_flip = True)

In [None]:
training_set = datagen.flow_from_directory('dataset/training_set', target_size = (64, 64), 
                                           batch_size = 32, class_mode = 'binary')

In [None]:
test_set = datagen.flow_from_directory('dataset/test_set', target_size = (64, 64), batch_size = 32, class_mode = 'binary')

Batch size is the number of data samples the network works on at once. As far as I understand, this does not improve accuracy but speed instead. Default value is 32 but you can change that acc to your PC specs I guess.

In [None]:
model.fit_generator(training_set, steps_per_epoch = 4000, epochs = 10, validation_data = test_set, validation_steps = 1000)

An epoch is one whole iteration over the training dataset i.e. the network has seen all the training data once after one epoch and so forth. Steps per epoch is the number of steps (how many batches of data samples) to fetch from generator before declaring one epoch finished and starting the next epoch. This number of steps should typically be equal to the number of samples (8000) of your dataset divided by the batch size (32) but for some reason 4000 worked nice for me by trial and error. Go figure  

In [None]:
test_image = image.load_img('dataset/moonmoon.jpg', target_size = (64, 64))
test_image = image.img_to_array(test_image)
test_image = np.expand_dims(test_image, axis = 0)

In [None]:
result = model.predict(test_image)
print(result)
training_set.class_indices
if result[0][0] == 1:
    prediction = 'dog'
    print(prediction)
else:
    prediction = 'cat'
    print(prediction)

In [4]:
from keras.models import model_from_json

Below code is for saving your model once you achieve your desired accuracy. Sequently it can be loaded as shown in the cells further down

In [None]:
# save model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")

In [5]:
# load json and create model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
print("Loaded model from disk")

Loaded model from disk


In [None]:
# evaluate loaded model on test data
loaded_model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
score = loaded_model.evaluate(X, Y, verbose=0)
print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))

In [10]:
test_image = image.load_img('dataset/catssuck.jpg', target_size = (64, 64))
test_image = image.img_to_array(test_image)
test_image = np.expand_dims(test_image, axis = 0)

result = loaded_model.predict(test_image)
print(result)
# training_set.class_indices
# if result[0][0] == 1:
#     prediction = 'dog'
#     print(prediction)
# else:
#     prediction = 'cat'
#     print(prediction)

[[ 0.]]
