In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Unzipping the Files

In [None]:
import zipfile as zp

In [None]:
with zp.ZipFile("/kaggle/input/dogs-vs-cats/test1.zip","r") as z:
    z.extractall(".")
with zp.ZipFile("/kaggle/input/dogs-vs-cats/train.zip","r") as z:
    z.extractall(".")

# Creating Required Directory Structure for ImageDataGenerator

In [None]:
os.mkdir("/kaggle/working/train/dog")
os.mkdir("/kaggle/working/train/cat")
os.mkdir("/kaggle/working/val")
os.mkdir("/kaggle/working/val/dog")
os.mkdir("/kaggle/working/val/cat")
print("Directories Created")

# Moving Training Data to the relevant Directories

In [None]:
import shutil
#shutil.rmtree('/kaggle/working/train')
#shutil.rmtree('/kaggle/working/test1')

In [None]:
path="/kaggle/working/train"
dog_dest = path +"/dog"
cat_dest = path +"/cat"
files = os.listdir(path)

for file_name in files:
        if "dog." in file_name:
            shutil.move(os.path.join(path, file_name),os.path.join(dog_dest,file_name))
        elif "cat." in file_name:
            shutil.move(os.path.join(path, file_name),os.path.join(cat_dest,file_name))
        else:
            continue
print("Done Moving!")

In [None]:
os.listdir(path)

# Creating a Random Validation Set

In [None]:
import random

In [None]:
source = "/kaggle/working/train/dog"
dest = '/kaggle/working/val/dog'
files = os.listdir(source)
no_of_files = 1250
for file_name in random.sample(files, no_of_files):
    shutil.move(os.path.join(source, file_name), dest)
os.listdir(dest)

In [None]:
source = "/kaggle/working/train/cat"
dest = '/kaggle/working/val/cat'
files = os.listdir(source)
no_of_files = 1250
for file_name in random.sample(files, no_of_files):
    shutil.move(os.path.join(source, file_name), dest)
os.listdir(dest)

# Lets Figure out Average Size of the Images so that we can standardize on a normal value

In [None]:
from matplotlib.image import imread

In [None]:
dim1=[]
dim2=[]
for image_filename in os.listdir(path+'/dog'):
    img=imread(path+'/dog/'+image_filename)
    d1,d2,colors=img.shape
    dim1.append(d1)
    dim2.append(d2)

In [None]:
import seaborn as sns
sns.jointplot(dim1,dim2)

# We observe that mean size of images are centered between 300 to 400, which is confirmed by the Numpy Mean, thus we take the image shape as 350,350

In [None]:
print(np.mean(dim1),np.mean(dim2))

# We would be passing these images to our model as GreyScale so the third parameter has been taken as one

In [None]:
image_shape=(350,350,1)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Initializing ImageDataGenerator

In [None]:
image_gen=ImageDataGenerator(rescale=1./255,rotation_range=20,width_shift_range=0.1,
                            height_shift_range=0.1,shear_range=0.1,
                             zoom_range=0.1,horizontal_flip=True,
                             fill_mode='nearest')

In [None]:
image_gen.flow_from_directory("/kaggle/working/train")

In [None]:
image_gen.flow_from_directory("/kaggle/working/val")

# Model Creation Step

In [None]:
from tensorflow.keras.models import Sequential

In [None]:
from tensorflow.keras.layers import Dense,Conv2D,MaxPool2D,Flatten,Dropout

In [None]:
model = Sequential()
model.add(Conv2D(filters=32,kernel_size=(3,3),input_shape=image_shape,activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Conv2D(32,kernel_size=(3,3),activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Conv2D(filters=64,kernel_size=(3,3),activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))

model.add(Flatten())

model.add(Dense(64,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1,activation='sigmoid'))

model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
early_stop=EarlyStopping(monitor='val_loss',patience=2)

In [None]:
batch_size=64

# Creating the Generator Objects so that it will pass images from the directory into the model as input. We had set what operations need to be performed on the images while initializing ImageDataGenerator

In [None]:
train_image_gen=image_gen.flow_from_directory("/kaggle/working/train",target_size=(350,350),color_mode='grayscale',batch_size=batch_size,
                                              class_mode='binary')
test_image_gen=image_gen.flow_from_directory("/kaggle/working/val",target_size=image_shape[:2],color_mode='grayscale',batch_size=batch_size,
                                              class_mode='binary',shuffle=False)

In [None]:
train_image_gen.class_indices

# Fitting The Model

In [None]:
results= model.fit_generator(train_image_gen,epochs=20,validation_data=test_image_gen,callbacks=[early_stop])

# Saving Model Weights and the Model

In [None]:
model.save_weights('dogcatcnn_weights.h5')
model.save('dogcatmodel.h5')

# Preparing our Test Data, We are creating an additional folder inside test1 directory so that we can use ImageDataGenerator

In [None]:
os.mkdir("/kaggle/working/test1/test_data")
path="/kaggle/working/test1"
dest = path +"/test_data"
files = os.listdir(path)

for file_name in files:
    if "test" not in file_name:
        shutil.move(os.path.join(path, file_name),os.path.join(dest,file_name))
    else:
        continue
print("Done Moving!")
print(len(os.listdir(dest)))

# Here we only use rescaling as all other parameters are not of use for us as we are now only predicting using the trained model

In [None]:
testing_gen=ImageDataGenerator(rescale=1./255)
realtest_image_gen=testing_gen.flow_from_directory("/kaggle/working/test1",target_size=image_shape[:2],color_mode='grayscale',batch_size=batch_size,
                                              class_mode='binary',shuffle=False)

# Predicing the Images, the values returned are between 0 and 1 as last layer uses a Sigmoid Activation

In [None]:
predictions=model.predict_generator(realtest_image_gen, verbose=1)

# Converting our predictions to either 1 or 0 based on the threshold of 0.5

In [None]:
preds=(predictions>0.5).astype(int)

In [None]:
preds

# Saving our Final Results in a DataFrame and Exporting it to CSV file. We use an id as index and save the relevant predictions in the label column

In [None]:
img_id=range(1,len(os.listdir(dest))+1)

In [None]:
final_results = pd.DataFrame({"id": img_id, "label":preds.reshape(12500,)})

In [None]:
final_results.to_csv("Submission.csv",index=False)