# Written by Abiola Obembe
## The Asirra (Dogs VS Cats) dataset
### Date 2020-09-19
#### Summary: The Asirra (animal species image recognition for restricting access) dataset was introduced in 2013 for a machine learning competition. The dataset includes 25,000 images with equal numbers of labels for cats and dogs.

### Install dependencies

In [51]:
import tensorflow as tf
import pandas as pd
import numpy as np
print("The version of Tensorflow is:", tf.__version__)

The version of Tensorflow is: 2.2.0


In [53]:
import numpy as np
import matplotlib.pyplot as plt
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator, load_img
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import random
import os

print("Libraries installed succesfully!")


Libraries installed succesfully!


### Load  Dataset: Training and Cross-validation sets

In [54]:
filenames = os.listdir("./dataset/train/train")
categories = []

for name in filenames:
    category = name.split('.')[0]
    if category == 'dog' :
        categories.append(1)
    else:
        categories.append(0)
        

df = pd.DataFrame({'filename': filenames, 'category': categories})  

df.head(10)

Unnamed: 0,filename,category
0,cat.0.jpg,0
1,cat.1.jpg,0
2,cat.10.jpg,0
3,cat.100.jpg,0
4,cat.1000.jpg,0
5,cat.10000.jpg,0
6,cat.10001.jpg,0
7,cat.10002.jpg,0
8,cat.10003.jpg,0
9,cat.10004.jpg,0


In [55]:
# Manage dataframe
df['category'] = df['category'].replace({0:'cat', 1:'dog'})
df.head()

Unnamed: 0,filename,category
0,cat.0.jpg,cat
1,cat.1.jpg,cat
2,cat.10.jpg,cat
3,cat.100.jpg,cat
4,cat.1000.jpg,cat


In [56]:
# Split dataframe to training set and cross-validation set
train_df, validate_df = train_test_split(df, test_size = 0.15, random_state = 1)
train_df = train_df.reset_index(drop = True)
validate_df = validate_df.reset_index(drop = True)
batch_size = 2**5
print("The total images in training set is:" ,str(train_df.shape[0]))
print("The total images in cross-validation set is:" ,str(validate_df.shape[0]))
print("The batch size for mini-batch size is", str(batch_size))

The total images in training set is: 21250
The total images in cross-validation set is: 3750
The batch size for mini-batch size is 32


In [57]:
# print training set dataframe
train_df.head()

Unnamed: 0,filename,category
0,cat.10671.jpg,cat
1,cat.12224.jpg,cat
2,dog.7467.jpg,dog
3,dog.12209.jpg,dog
4,dog.8652.jpg,dog


In [29]:
# print cross-validation set dataframe
validate_df.head()

Unnamed: 0,filename,category
0,dog.6841.jpg,dog
1,cat.7288.jpg,cat
2,dog.2738.jpg,dog
3,dog.10090.jpg,dog
4,cat.6148.jpg,cat


### Load  Dataset: Test set

In [58]:
filenames_test = os.listdir("./dataset/test1/test1")
categories = []

        

test_df = pd.DataFrame({'filename_test': filenames_test})  

test_df.head(10)

Unnamed: 0,filename_test
0,1.jpg
1,10.jpg
2,100.jpg
3,1000.jpg
4,10000.jpg
5,10001.jpg
6,10002.jpg
7,10003.jpg
8,10004.jpg
9,10005.jpg


### Image Preprocessing

In [59]:
# Import libraries for image processing
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator, load_img
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import random
print('Installation complete and successful!')


Installation complete and successful!


In [60]:
# Define Image properties
Image_width = 128
Image_height = 128
Image_size = (Image_width, Image_height)
Image_channels = 3

In [61]:
# Training set image generator: Image Augmentation
train_datagen = ImageDataGenerator(
    rescale = 1./255,
    rotation_range=20,
    shear_range = 0.2,
    zoom_range = 0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

train_gen = train_datagen.flow_from_dataframe(
    train_df,
    directory="./dataset/train/train",
    x_col="filename",
    y_col="category",
    target_size=Image_size,
    class_mode="categorical",
    batch_size=batch_size,
    shuffle=True,
    seed=1)

Found 21250 validated image filenames belonging to 2 classes.


In [62]:
# Cross- validation set image generator

validate_datagen = ImageDataGenerator(rescale = 1./255)

validate_gen =validate_datagen.flow_from_dataframe(
    validate_df,
    directory="./dataset/train/train",
    x_col="filename",
    y_col="category",
    target_size=Image_size,
    class_mode="categorical",
    batch_size=batch_size,
    shuffle=True,
    seed=1)

Found 3750 validated image filenames belonging to 2 classes.


### Preprocess the test set

In [35]:
# Create test set generator
test_datagen = ImageDataGenerator(rescale = 1./255)

test_gen = train_datagen.flow_from_dataframe(
    train_df,
    directory="./dataset/test1/test1",
    x_col="filename",
    y_col="category",
    target_size=Image_size,
    class_mode="categorical",
    batch_size=batch_size,
    shuffle=True,
    seed=1)

Found 0 validated image filenames belonging to 0 classes.


  .format(n_invalid, x_col)


### Build the ConvNet: Input -> Conv -> Pool ->  Conv -> Pool -> Flatten -> Fully Connected -> Fully Connected -> yhat

In [36]:
# Import required utilities from keras library
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization

print("Utilities installed successfully!")

Utilities installed successfully!


In [37]:
# Initailize the CNN
cnn = Sequential()

In [38]:
# Step 1
#cnn.add(Conv2D(filters = 32, kernel_size = 3, activation = tf.keras.layers.LeakyReLU(alpha=0.1), input_shape = [Image_width, Image_height, Image_channels] ))
cnn.add(Conv2D(filters = 32, kernel_size = 3, activation = 'relu',input_shape = [Image_width, Image_height, Image_channels]))
#cnn.add(BatchNormalization())


In [39]:
# Step 2: Maxpooling
cnn.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='valid'))
#cnn.add(Dropout(0.2))

In [40]:
# Step 3: Convolution 
#cnn.add(Conv2D(filters = 32, kernel_size = 3, activation = tf.keras.layers.LeakyReLU(alpha=0.1) ))
cnn.add(Conv2D(filters = 32, kernel_size = 3, activation = 'relu' ))
# cnn.add(BatchNormalization())

In [41]:
# Step 4: MaxPooling
cnn.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='valid'))
#cnn.add(Dropout(0.2))

In [42]:
# Step 5: Flatten
cnn.add(Flatten())

In [44]:
# Step 6: Full Connection
#cnn.add(Dense(units = 128,activation = tf.keras.layers.LeakyReLU(alpha=0.1)))
cnn.add(Dense(units = 128,activation = 'relu') )
#cnn.add(BatchNormalization())
#cnn.add(Dropout(0.2))

In [None]:
# Step 7: Full Connection
#cnn.add(Dense(units = 64,activation = tf.keras.layers.LeakyReLU(alpha=0.1)))
#cnn.add(Dense(units = 128,activation = 'relu') 
#cnn.add(BatchNormalization())
#cnn.add(Dropout(0.2))

In [45]:
# Step 8: Output Layer
cnn.add(Dense(units = 1, activation = 'sigmoid'))

### Compile CNN

In [46]:
cnn.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

# rmsprop, sgd

### Model Summry

In [47]:
cnn.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 126, 126, 32)      896       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 125, 125, 32)      0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 123, 123, 32)      9248      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 122, 122, 32)      0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 476288)            0         
_________________________________________________________________
dense (Dense)                (None, 128)               60964992  
_________________________________________________________________
dense_1 (Dense)              (None, 1)                

### Train the ConVnet

In [49]:
# Define callbacks and learning rate
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
earlystop = EarlyStopping(patience = 10)
learning_rate_reduction = ReduceLROnPlateau( monitor = 'val_accuracy', patience = 2, verbose = 1, factor = 0.25, min_lr = 0.00001)
callbacks = [earlystop, learning_rate_reduction]

In [50]:
# train the model
m_train = train_df.shape[0]
m_validate = validate_df.shape[0]
history = cnn.fit(train_gen, epochs = 10, validation_data = validate_gen, 
                             validation_steps =  m_validate// batch_size, 
                             steps_per_epoch= m_train// batch_size, callbacks = callbacks)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 00003: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 4/10

KeyboardInterrupt: 

### Save the Model

In [None]:
# Save the model
cnn.save('Model1_Dogs_Vs_Cat_10epochs_RelU.h5')

### Make Prediction from Test set

In [None]:
# Make prediction
m_test = test_df.shape[0]
predict = cnn.predict_generator(test_gen, steps = m_test//batch_size)
print(predict)