In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Importing Libraries

In [None]:
import os # to get the file paths and everything
import zipfile # to unzip the folder
import tensorflow as tf # for everything
import random # To shufle the images
from tensorflow.keras.preprocessing.image import ImageDataGenerator # to automatically label the images
from tensorflow.keras.optimizers import RMSprop # Optimizer
from shutil import copyfile # to copy the files from one folder to another folder


## Extracting the files from the zipped folder

In [None]:
local_zip1 = '/kaggle/input/dogs-vs-cats/'
dirs = ["train.zip", "test1.zip"]
for directory in dirs:
    local_zip = os.path.join(local_zip1, directory)
    zip_ref = zipfile.ZipFile(local_zip, 'r')
    zip_ref.extractall('/kaggle/working/dogs-vs-cats/')
    zip_ref.close()

## Sanity Check for the images in the directories

In [None]:
# Sanity check to get all the file names in the train  and test directory
for directory in ["train", "test1"]:
    path = os.path.join("/kaggle/working/dogs-vs-cats/", directory)
    for dirname, _, filenames in os.walk(path):
        for filename in filenames:
            print(os.path.join(dirname, filename))
            break

### Getting the length of files in directories

In [None]:
print(len(os.listdir('/kaggle/working/dogs-vs-cats/train')))
print(len(os.listdir('/kaggle/working/dogs-vs-cats/test1')))

### Creating new directories so to make life easier for labeling

In [None]:
# Creating the directory
try:
    os.mkdir('/kaggle/working/dogs-vs-cats/train/cat')
    os.mkdir('/kaggle/working/dogs-vs-cats/train/dog')
except OSError:
    pass

### These are the directories

In [None]:
CATDIR = '/kaggle/working/dogs-vs-cats/train/cat'
DOGDIR = '/kaggle/working/dogs-vs-cats/train/dog'

### Move the images from the original dataset to cat and dogs directory

In [None]:
for file in os.listdir("/kaggle/working/dogs-vs-cats/train"):
    if "cat" in file:
        copyfile((os.path.join(dirname, filename)),(os.path.join(CATDIR, file)))
    elif "dog" in file:
        copyfile((os.path.join(dirname, filename)),(os.path.join(DOGDIR, file)))

### Sanity check for lengths

In [None]:
print(len(os.listdir(CATDIR)))
print(len(os.listdir(DOGDIR)))

### Creating Directories for validation, training and further cats and dogs directories inside the earlier two directories.

In [None]:
DIRS = ['/kaggle/working/cats-v-dogs',
        '/kaggle/working/cats-v-dogs/training',
        '/kaggle/working/cats-v-dogs/validation',
        '/kaggle/working/cats-v-dogs/training/cats',
        '/kaggle/working/cats-v-dogs/training/dogs',
        '/kaggle/working/cats-v-dogs/validation/cats',
        '/kaggle/working/cats-v-dogs/validation/dogs']

In [None]:
for directory in DIRS:
    try:
        os.mkdir(directory)
    except OSError:
        pass

### Splitting the data

In [None]:
#split some part of traindata into validation set
def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
    files = []
    
    for file_name in os.listdir(SOURCE):
        file_path = os.path.join(SOURCE, file_name)
        
        if os.path.getsize(file_path):
            files.append(file_name)
    
    length_of_files = len(files)
    split_point = int(length_of_files * SPLIT_SIZE)
    
    shuffled = random.sample(files, length_of_files)
    
    train_set = shuffled[:split_point]
    test_set = shuffled[split_point:]
    
    for file_name in train_set:
        copyfile(SOURCE + file_name, TRAINING + file_name)
        
    for file_name in test_set:
        copyfile(SOURCE + file_name, TESTING + file_name)

In [None]:
CAT_SOURCE_DIR = "/kaggle/working/dogs-vs-cats/train/cat/"
TRAINING_CATS_DIR = "/kaggle/working/cats-v-dogs/training/cats/"
TESTING_CATS_DIR = "/kaggle/working/cats-v-dogs/validation/cats/"
DOG_SOURCE_DIR = "/kaggle/working/dogs-vs-cats/train/dog/"
TRAINING_DOGS_DIR = "/kaggle/working/cats-v-dogs/training/dogs/"
TESTING_DOGS_DIR = "/kaggle/working/cats-v-dogs/validation/dogs/"

split_size = .9
split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)

### Sanity Check for the number of files in training set

In [None]:
print(len(os.listdir('/kaggle/working/cats-v-dogs/training/cats/')))
print(len(os.listdir('/kaggle/working/cats-v-dogs/training/dogs/')))
print(len(os.listdir('/kaggle/working/cats-v-dogs/validation/cats/')))
print(len(os.listdir('/kaggle/working/cats-v-dogs/validation/dogs/')))

### Model

In [None]:
model  = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16, (3,3), activation = "relu", input_shape =(150, 150, 3)),
    tf.keras.layers.MaxPool2D(2,2),
    tf.keras.layers.Conv2D(32, (3,3), activation = "relu"),
    tf.keras.layers.MaxPool2D(2,2),
    tf.keras.layers.Conv2D(64, (3,3), activation = "relu"),
    tf.keras.layers.MaxPool2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation = "relu"),
    tf.keras.layers.Dense(1, activation = "sigmoid")
])

### Compile the Model

In [None]:
model.compile(
    optimizer=RMSprop(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

### Making use of Imagegenerator

In [None]:
TRAINING_DIR = '/kaggle/working/cats-v-dogs/training/'
train_datagen = ImageDataGenerator( rescale = 1.0/255. )

train_generator = train_datagen.flow_from_directory(TRAINING_DIR,
                                                    batch_size=20,
                                                    class_mode='binary',
                                                    target_size=(150, 150))

VALIDATION_DIR = '/kaggle/working/cats-v-dogs/validation/'
validation_datagen = ImageDataGenerator( rescale = 1.0/255. )

validation_generator = validation_datagen.flow_from_directory(VALIDATION_DIR,
                                                         batch_size=20,
                                                         class_mode  = 'binary',
                                                         target_size = (150, 150))

### Fit the Model

In [None]:
history = model.fit(train_generator,
                    epochs=5,
                    verbose=1,
                    validation_data=validation_generator)

In [None]:
#Predicting images in testset
%matplotlib inline

import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from keras.preprocessing import image

count = 10
for filename in os.listdir('/kaggle/working/dogs-vs-cats/test1'):
    #Get the path of the images in the test directory
    print(os.path.join('/kaggle/working/dogs-vs-cats/test1', filename))
    
    path = os.path.join('/kaggle/working/dogs-vs-cats/test1', filename)
    img = image.load_img(path, target_size=(150, 150))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    
    img = mpimg.imread(path)
    plt.imshow(img)
    plt.show()
    
    images = np.vstack([x])
    classes = model.predict(images, batch_size=10)
    if classes[0]>0.5:
        print(filename + " is a dog")
    else:
        print(filename + " is a cat")
    print("--"*30)
    count -= 1
    if(count < 0):
        break