# Import Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import os
import zipfile
import random
import tensorflow as tf

from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from shutil import copyfile

# Unzipping Training Images

In [None]:
local_zip = '../input/dogs-vs-cats/train.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp/source')
zip_ref.close()
print('No. of Training Images: ', len(os.listdir('/tmp/source/train')))

In [None]:
os.listdir('/tmp/source/train')[:10]

# Visualizing Random Images

In [None]:
plt.figure(figsize=(16,16))

images = os.listdir('/tmp/source/train')
for i in range(8):
    img = random.choice([x for x in images])
    fig = plt.subplot(4,4,i+1)
    fig.axis('off')
    img = mpimg.imread(os.path.join('/tmp/source/train', img)) 
    fig.imshow(img) 

# Unzipping Testing Images

In [None]:
local_zip = '../input/dogs-vs-cats/test1.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp/source')
zip_ref.close()
print('No. of Testing Images: ',len(os.listdir('/tmp/source/test1')))

In [None]:
os.listdir('/tmp/source/test1')[:10]

# Directories

 * cats-v-dogs:
    * training:
        * cats
        * dogs
    * validation:
        * cats
        * dogs

In [None]:
try:
    parent_dir = '/tmp/cats-v-dogs'
    os.mkdir(parent_dir)

    for dir in ['training', 'validation']:
        path = os.path.join(parent_dir , dir)
        os.mkdir(path)
        for dir in ['cats', 'dogs']:
            n_path = os.path.join(path , dir)
            os.mkdir(n_path)
except OSError:
    print('error')

In [None]:
src_dir = '/tmp/source/train'

cats_train_dir = '/tmp/cats-v-dogs/training/cats/'
cats_valid_dir = '/tmp/cats-v-dogs/validation/cats/'

dogs_train_dir = '/tmp/cats-v-dogs/training/dogs/'
dogs_valid_dir = '/tmp/cats-v-dogs/validation/dogs/'

# List of cats names in all training images
cats =[img for img in os.listdir(src_dir) if img[:3] == 'cat']

# List of dogs names in all training images
dogs =[img for img in os.listdir(src_dir) if img[:3] == 'dog']

# Take 10% of training images for validation    
cats_valid_sample = random.sample(cats, int(0.1*len(cats))) 
dogs_valid_sample = random.sample(dogs, int(0.1*len(dogs)))

for img in os.listdir(src_dir):
    src_img = os.path.join(src_dir, img)
    
    if img[:3] == 'cat':
        if img in cats_valid_sample:
            dest_img = os.path.join(cats_valid_dir, img)
        else:
            dest_img = os.path.join(cats_train_dir, img)
        
    elif img[:3] == 'dog':
        if img in dogs_valid_sample:
            dest_img = os.path.join(dogs_valid_dir, img)
        else:
            dest_img = os.path.join(dogs_train_dir, img)

    copyfile(src_img, dest_img)  

In [None]:
print('No. of ALL cats : ', len(cats))
print('No. of cats for training: ', len(os.listdir(cats_train_dir)))
print('No. of cats for validation: ', len(os.listdir(cats_valid_dir)))

print('\nNo. of ALL dogs : ', len(dogs))
print('No. of dogs for training: ', len(os.listdir(dogs_train_dir)))
print('No. of dogs for validation: ', len(os.listdir(dogs_valid_dir)))

# Visualizing Random images of cats for training

In [None]:
plt.figure(figsize=(16,16))

images = os.listdir(cats_train_dir)
for i in range(8):
    img = random.choice([x for x in images])
    fig = plt.subplot(4,4,i+1)
    fig.axis('off')
    img = mpimg.imread(os.path.join(cats_train_dir, img)) 
    fig.imshow(img) 

# Visualizing Random images of dogs for validation

In [None]:
plt.figure(figsize=(16,16))

images = os.listdir(dogs_valid_dir)
for i in range(8):
    img = random.choice([x for x in images])
    fig = plt.subplot(4,4,i+1)
    fig.axis('off')
    img = mpimg.imread(os.path.join(dogs_valid_dir, img)) 
    fig.imshow(img) 

# Images Augmentation

In [None]:
# All images will be rescaled by 1./255
# All images will be resized to 150x150

train_datagen = ImageDataGenerator(rescale=1/255,rotation_range=40, width_shift_range=0.2, height_shift_range=0.2,
                                   shear_range=0.2,zoom_range=0.2,horizontal_flip=True, fill_mode='nearest')
valid_datagen = ImageDataGenerator(rescale=1/255)


train_dir = '/tmp/cats-v-dogs/training/'
train_generator = train_datagen.flow_from_directory(train_dir, target_size=(150, 150), batch_size=28, class_mode='binary')

valid_dir = '/tmp/cats-v-dogs/validation/'
valid_generator = valid_datagen.flow_from_directory(valid_dir,target_size=(150, 150), batch_size=28,class_mode='binary')

# Our CNN

In [None]:
early_stopping = EarlyStopping( min_delta=0.01, # minimium amount of change to count as an improvement
                                patience=5, # how many epochs to wait before stopping
                                restore_best_weights=True,
                              )

strategy = tf.distribute.MirroredStrategy()

with strategy.scope():
    model = Sequential([     Conv2D(32, (3,3), activation='relu', input_shape=(150, 150, 3)),
                             MaxPooling2D(2, 2),

                             Conv2D(64, (3,3), activation='relu'),
                             MaxPooling2D(2,2),

                             Conv2D(64, (3,3), activation='relu'),
                             MaxPooling2D(2,2),

                             Flatten(),

                             Dense(128, activation='relu'),
                             Dense(1, activation='sigmoid')      
                        ])

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
                        
model.summary()

In [None]:
history = model.fit(train_generator, epochs=20, verbose=1,validation_data=valid_generator, callbacks = [early_stopping])

In [None]:
print(valid_generator.class_indices)

In [None]:
history_df = pd.DataFrame(history.history)
history_df.loc[:,['loss','val_loss']].plot()
history_df.loc[:,['accuracy','val_accuracy']].plot()

In [None]:
model.evaluate(valid_generator)

# Predicting of Testing Images


In [None]:
test_dir = '/tmp/source/test1'
os.listdir(test_dir)[:10]

In [None]:
def convert_one_img(img_folder, img):
  # dimensions of image
    img_width, img_height = 150, 150
    img = os.path.join(img_folder, img)
    img = image.load_img(img, target_size=(img_width, img_height))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img /= 255

    return img

In [None]:
def convert_imgs_to_tensors(img_folder):

  # load all images into a list
  images = []

  for img in os.listdir(img_folder):
    img = convert_one_img(img_folder, img)
    images.append(img)

  # stack up images list to pass for model
  images = np.vstack(images)

  return images

In [None]:
img1= convert_one_img(test_dir, os.listdir(test_dir)[0])
model.predict(img1)

# Try Prediction on Training Data (Cats)

In [None]:
plt.figure(figsize=(16,16))

images = os.listdir(cats_train_dir)[:20]
for i, img in enumerate(images):
    fig = plt.subplot(5,5,i+1)
    fig.axis('off')
    img = mpimg.imread(os.path.join(cats_train_dir, img)) 
    fig.imshow(img)
    pic = convert_one_img(cats_train_dir, os.listdir(cats_train_dir)[i])
    label = 'Cat' if (model.predict(pic) < 0.5).astype('int') else 'Dog'
    plt.title(label)

# Try Prediction on Validation Data (Dogs)

In [None]:
plt.figure(figsize=(16,16))

images = os.listdir(dogs_valid_dir)[:18]
for i, img in enumerate(images):
    fig = plt.subplot(5,5,i+1)
    fig.axis('off')
    img = mpimg.imread(os.path.join(dogs_valid_dir, img)) 
    fig.imshow(img)
    pic = convert_one_img(dogs_valid_dir, os.listdir(dogs_valid_dir)[i])
    label = 'Cat' if (model.predict(pic) < 0.5).astype('int') else 'Dog'
    plt.title(label)

In [None]:
test_dir = '/tmp/source/test1'
test_images= convert_imgs_to_tensors(test_dir)

# Check Predictions on First 100 Images of Testing Data

In [None]:
plt.figure(figsize=(20,20))

images = os.listdir(test_dir)[:100]
for i, img in enumerate(images):
    fig = plt.subplot(10,10,i+1)
    fig.axis('off')
    img = mpimg.imread(os.path.join(test_dir, img)) 
    fig.imshow(img)
    pic = convert_one_img(test_dir, os.listdir(test_dir)[i])
    label = 'Cat' if (model.predict(pic) < 0.5).astype('int') else 'Dog'
    plt.title(label)

> Only 5/100 are wrong predictions from first 100 images of testing data

In [None]:
pred = (model.predict(test_images) > 0.5).astype('int')

In [None]:
test_dir = '/tmp/source/test1'
test_data = os.listdir(test_dir)
test_df = pd.DataFrame({ "id" : test_data})
test_df

In [None]:
test_df['id'] = test_df['id'].str.split('.').str[0]
test_df['label'] = pred
test_df.head(20)

In [None]:
test_df.to_csv('submission.csv',index=False)