In [None]:
"""
Agenda
Collecting data
Importing libraries and splitting data to train and test
Building the network
Data augmentation
Training
Testing
"""

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Convolution2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense

In [None]:
"""
This is the most important step in creating our network. It consists of four parts:

Convolution
Pooling
Flattening
Full connection
"""

# Initialising the CNN
classifier = Sequential()

# Step 1 - Convolution
classifier.add(Convolution2D(32, 3, 3, input_shape = (64, 64, 3), activation = 'relu'))
# Step 2 - Pooling
classifier.add(MaxPooling2D(pool_size = (2, 2)))
# Step 3 - Flattening
classifier.add(Flatten())
# Step 4 - Full connection
classifier.add(Dense(128, activation = 'relu'))
classifier.add(Dense(1, activation = 'sigmoid'))

# Compiling the CNN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [None]:
"""
1. Convolution

The primary purpose of convolution is to extract features from the input image. Convolution preserves the spatial relationship between pixels by learning image features 
using small squares of input data.

First three parameters refer to:

Filters: The dimensionality of the output space (i.e. the number of output filters in the convolution). We will end up with a convolved feature matrix of 32x32.
Kernel_size: Specifying the height and width of the 2D convolution window. It can be a tuple if we don't want a square.
Strides: Specifying the strides of the convolution along with the height and width. It can be a tuple if we want a different height and width.
"""


"""
Pooling

In the case of Max Pooling, we use the parameter pool_size to define a spatial neighborhood (in our case a 2×2 window) and take the largest element from the rectified 
feature map within that window.
"""


"""
Flattening

We convert the matrix into a 1D array which can be the input of the final Neural Network.
"""


"""
Full Connection

The full connection is connecting our convolutional network to a Neural Network that does final predictions.
"""

In [None]:
"""
Step 4 - Data Augmentation
We need a lot of data to train a network but suppose we have a limited number of images for our network – what do we do? We don’t need to hunt for new images that can be 
added to our dataset. Why? Because neural networks aren’t smart, to begin with.

So, to get more data, we just need to make alterations to our existing dataset – minor changes such as flips, translations, or rotations – and our neural network will think 
these are distinct images anyway. Data augmentation is a way of reducing overfitting of models, where we increase the amount of training data using only the information from 
our training data. The field of data augmentation is not new and, in fact, there are various data augmentation techniques for specific problems.
"""

from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)

test_datagen = ImageDataGenerator(rescale = 1./255)

training_set = train_datagen.flow_from_directory('dataset/training_set',
                                                 target_size = (64, 64),
                                                 batch_size = 32,
                                                 class_mode = 'binary')

test_set = test_datagen.flow_from_directory('dataset/test_set',
                                            target_size = (64, 64),
                                            batch_size = 32,
                                            class_mode = 'binary')

In [None]:
# Step 5 - Training

history = classifier.fit_generator(training_set,
                         steps_per_epoch = 50,
                         epochs = 10,
                         validation_data = test_set)

# Beware of overfitting in case of using too many epochs.

In [None]:
from matplotlib import pyplot as plt
# plot history
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show()

"""
We can see that even though the loss of the training set goes down quite fast, it is not the case with the test set. 
That means that our model doesn't generalize very well on new data.
"""

In [None]:
# test

import numpy as np
from tensorflow.keras.preprocessing import image
# loading an image from the disk
test_image = image.load_img('random_dog.png', target_size = (64, 64))
# converting the image to a numpy array
test_image = image.img_to_array(test_image)
test_image = np.expand_dims(test_image, axis = 0)
result = classifier.predict(test_image)
# print(training_set.class_indices)
# our cut-off
if result[0][0] >= 0.5:
    prediction = 'dog'
else:
    prediction = 'cat'
print(prediction)