In [None]:
# Import the necessary packages
import os
import zipfile
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
import os
import matplotlib.pyplot as plt
import PIL
import seaborn as sns
import plotly
import plotly.graph_objs as go
from sklearn.model_selection import train_test_split                   # sklearn use it for split/shuffle the data
from sklearn.utils import shuffle
from plotly.offline import iplot, init_notebook_mode
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.utils import plot_model
from IPython.display import display
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint, LearningRateScheduler

In [None]:
# unzip the data files train and test
import os
print(os.listdir("../input"))

import zipfile

with zipfile.ZipFile("../input/dogs-vs-cats-redux-kernels-edition/train.zip","r") as z:
    z.extractall(".")
    
with zipfile.ZipFile("../input/dogs-vs-cats-redux-kernels-edition/test.zip","r") as z:
    z.extractall(".")

In [None]:
# Input Train Data into DataFrame
train = []                                     # create empty folder name as train
label = []                                     # create empty folder name as label

# os.listdir returns the list of files in the folder, in this case image class names
for i in os.listdir('/kaggle/working/train'):                             
    
    train_class = i.split(".", 1)
    
    for j in train_class[0:1]:
        
        train.append(i)                         # append image file into the new folder train
        
        label.append(j)                         # append the name of folder as the label of the image file
        
full_df = pd.DataFrame({'Image': train,'Labels': label})  # create data frame from dictionary with 2 coloums Image and Labels
full_df

In [None]:
# see number of cat and dog in train data set
sns.countplot(label)

In [None]:
# Make sure same number of cat and dog in training set and validation set 
cat_df = full_df[full_df['Labels'] == 'cat']
dog_df = full_df[full_df['Labels'] == 'dog']

dog_df = shuffle(dog_df)                                              # Shuffle the data and split it into training and valid

train_dog, valid_dog = train_test_split(dog_df, test_size = 0.20)     # Split all dog images 20% for validation and 80% for training

cat_df = shuffle(cat_df)

train_cat, valid_cat = train_test_split(cat_df, test_size = 0.20)     # Split all cat images 20% for validation and 80% for training

train_full = pd.concat([train_cat, train_dog], axis=0)                # Concatenate 80% dog and cat images into train_full dataframe
valid_full = pd.concat([valid_cat, valid_dog], axis=0)                # Concatenate 20% dog and cat images into valid_full dataframe

In [None]:
# Preprocessing the training set and applying data augmentation

train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2, ### Choose a shear_range
                                   zoom_range = 0.2   ### Choose a zoom range
                                   ) 



train_generator = train_datagen.flow_from_dataframe(
    train_full,
    directory='/kaggle/working/train/',
    x_col="Image",
    y_col="Labels",
    target_size=(64, 64),
    color_mode="rgb",
    class_mode="binary",
    batch_size=32)

In [None]:
# Preprocessing the validation set
validation_generator = train_datagen.flow_from_dataframe(
    valid_full,
    directory='/kaggle/working/train/',
    x_col="Image",
    y_col="Labels",
    target_size=(64, 64),
    color_mode="rgb",
    class_mode="binary",
    batch_size=32)


In [None]:
# Initializing the CNN
cnn = tf.keras.models.Sequential()

# Note the input shape is the desired size of the image 64*64 with 3 bytes color
# Create the first Convolutional Layer
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[64, 64, 3]))

# Create a Pooling Layer
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

# Create the second Convolutional Layer
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))

# Add another Pooling Layer
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

# Flatten the results to feed into the CNN
cnn.add(tf.keras.layers.Flatten())

# Fully Connected Convolutional Neural Network with 128 neuron hidden layer
cnn.add(tf.keras.layers.Dense(units=128, activation=  'relu' )) ### Choose Activation Function

# Creating the Output Layer
cnn.add(tf.keras.layers.Dense(units=1, activation=  'sigmoid' )) ### 'Sigmoid' use for Binary Classification

cnn.summary()

In [None]:
cnn.compile(optimizer = 'adam' , loss = 'binary_crossentropy', metrics = ['accuracy']) ### Choose adam Optimizer and binary_crossentropy

In [None]:
#using early stopping to exit training if validation loss is not decreasing even after certain epochs (patience)
earlystopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)

#save the best model with lower validation loss
checkpointer = ModelCheckpoint(filepath="Cats and Dogs Classification.hdf5", verbose=1, save_best_only=True)

In [None]:
class myCallback(tf.keras.callbacks.Callback):
        def on_epoch_end(self, epoch, logs={}):
            if(logs.get('loss')<0.006):
                print("\nReached 99.8% accuracy so cancelling training!")
                self.model.stop_training = True
    
            callbacks = [checkpointer , earlystopping]

In [None]:
#Training our CNN on the training set and evaluating it on the test set
history = cnn.fit(x = train_generator, validation_data = validation_generator, epochs = 25,callbacks=[checkpointer , earlystopping])

In [None]:
import matplotlib.pyplot as plt

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training Accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend(loc=0)
plt.figure()

plt.show()

In [None]:
test_images_dir = '/kaggle/working/test/'

# Creat test_df and check the number of images in the test dataset
test = []

# os.listdir returns the list of files in the folder, in this case image class names
for i in os.listdir(test_images_dir):
    img = os.path.join(test_images_dir, i)
    test.append(img)

print('Number of test images in the test dataset : {} \n'.format(len(test)))

# submission_df with 'image' columns
submission_df = pd.DataFrame({'Image': test})

num_test_images = len(test)

In [None]:
pip install keras==2.1.2

In [None]:
# Input Test Data into DataFrame
test = []                                     # create empty folder name as test
num_id = []                                   # create empty folder name as id number

# os.listdir returns the list of files in the folder, in this case image class names
for i in os.listdir('/kaggle/working/test'):                             
    
    test_class = i.split(".", 1)
    
    for j in test_class[0:1]:
        
        test.append(i)                         # append image file into the new folder train
        
        num_id.append(j)                         # append the name of folder as the label of the image file
        
submission_id = pd.DataFrame({'id': num_id})  # create data frame from dictionary with 2 coloums Image and Labels
submission_id

In [None]:
# predict test data
img_id = []
predict = []

from keras.preprocessing import image
for i in range(0, submission_df.shape[0]):  
   
    path = submission_df['Image'][i]

    img = image.load_img(path, target_size=(64, 64))
    
  # converting image to array
    img = np.asarray(img, dtype= np.float32)
    
  # normalizing the image
    img = img /255
    
  # reshaping the image in to a 4D array
    img = img.reshape(-1,64,64,3)
    
    result_predict = cnn.predict(img)[0]
        
    predict.append("%.2f"%result_predict)
    
    i_id = path.split(".", 1)
    
    img_id.append(i_id)

In [None]:
submission_label = pd.DataFrame({'label': predict})

In [None]:
submission = pd.concat([submission_id, submission_label], axis=1)

In [None]:
submission

In [None]:
submission[['id', 'label']].to_csv('submission.csv', index=False)