In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import zipfile
import random
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile

In [None]:
#Extracting training data
local_zip = '/kaggle/input/dogs-vs-cats/train.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/kaggle/working/dogs-vs-cats/')
zip_ref.close()

In [None]:
#train set has label in image name i.e cat.jpg or dog.jpg
import os
for dirname, _, filenames in os.walk('/kaggle/working/dogs-vs-cats/train'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        break

In [None]:
#Extracting testing data
local_zip1 = '/kaggle/input/dogs-vs-cats/test1.zip'
zip_ref1 = zipfile.ZipFile(local_zip1, 'r')
zip_ref1.extractall('/kaggle/working/dogs-vs-cats/')
zip_ref1.close()

In [None]:
#test set didnot have any label in image name i.e cat or dog
import os
for dirname, _, filenames in os.walk('/kaggle/working/dogs-vs-cats/test1'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        break

In [None]:
#length of train set and test set
print(len(os.listdir('/kaggle/working/dogs-vs-cats/train')))
print(len(os.listdir('/kaggle/working/dogs-vs-cats/test1')))

In [None]:
#filter out cats and dogs to seperate folders in trainset
try:
    os.mkdir('/kaggle/working/dogs-vs-cats/train/cat')
    os.mkdir('/kaggle/working/dogs-vs-cats/train/dog')
except OSError:
    pass
cat_dir = '/kaggle/working/dogs-vs-cats/train/cat'
dog_dir = '/kaggle/working/dogs-vs-cats/train/dog'
cdfilenames = []
for dirname, _, filenames in os.walk('/kaggle/working/dogs-vs-cats/train'):
    for filename in filenames:
        #print(filename)
        if(filename not in cdfilenames):
            if("cat" in filename):
                #print(((os.path.join(dirname, filename)),(os.path.join(cat_dir,filename))))
                copyfile((os.path.join(dirname, filename)),(os.path.join(cat_dir,filename)))
            elif("dog" in filename):
                #print((os.path.join(dirname, filename)),(os.path.join(dog_dir,filename)))
                copyfile((os.path.join(dirname, filename)),(os.path.join(dog_dir,filename)))
            cdfilenames.append(filename)

In [None]:
print(len(os.listdir('/kaggle/working/dogs-vs-cats/train/cat/')))
print(len(os.listdir('/kaggle/working/dogs-vs-cats/train/dog/')))

In [None]:
dirs = [
    '/kaggle/working/cats-v-dogs',
    '/kaggle/working/cats-v-dogs/training',
    '/kaggle/working/cats-v-dogs/validation',
    '/kaggle/working/cats-v-dogs/training/cats',
    '/kaggle/working/cats-v-dogs/training/dogs',
    '/kaggle/working/cats-v-dogs/validation/cats',
    '/kaggle/working/cats-v-dogs/validation/dogs'
]
for d in dirs:
    try:
        os.mkdir(d)
        print("Created "+d+" successfully")
        #YOUR CODE GOES HERE
    except OSError:
        print("Failed to create " + d)
        pass

In [None]:
#split some part of traindata into validation set
def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
    all_files = []
    
    for file_name in os.listdir(SOURCE):
        file_path = SOURCE + file_name

        if os.path.getsize(file_path):
            all_files.append(file_name)
        else:
            print('{} is zero length, so ignoring'.format(file_name))
    
    n_files = len(all_files)
    split_point = int(n_files * SPLIT_SIZE)
    
    shuffled = random.sample(all_files, n_files)
    
    train_set = shuffled[:split_point]
    test_set = shuffled[split_point:]
    
    for file_name in train_set:
        copyfile(SOURCE + file_name, TRAINING + file_name)
        
    for file_name in test_set:
        copyfile(SOURCE + file_name, TESTING + file_name)


CAT_SOURCE_DIR = "/kaggle/working/dogs-vs-cats/train/cat/"
TRAINING_CATS_DIR = "/kaggle/working/cats-v-dogs/training/cats/"
TESTING_CATS_DIR = "/kaggle/working/cats-v-dogs/validation/cats/"
DOG_SOURCE_DIR = "/kaggle/working/dogs-vs-cats/train/dog/"
TRAINING_DOGS_DIR = "/kaggle/working/cats-v-dogs/training/dogs/"
TESTING_DOGS_DIR = "/kaggle/working/cats-v-dogs/validation/dogs/"

split_size = .9
split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)

In [None]:
print(len(os.listdir('/kaggle/working/cats-v-dogs/training/cats/')))
print(len(os.listdir('/kaggle/working/cats-v-dogs/training/dogs/')))
print(len(os.listdir('/kaggle/working/cats-v-dogs/validation/cats/')))
print(len(os.listdir('/kaggle/working/cats-v-dogs/validation/dogs/')))

In [None]:
# KERAS MODEL TO CLASSIFY CATS V DOGS
# 3 CONVOLUTION LAYERS
model = tf.keras.models.Sequential([
    # Note the input shape is the desired size of the image 150x150 with 3 bytes color
    tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2), 
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'), 
    tf.keras.layers.MaxPooling2D(2,2),
    # Flatten the results to feed into a DNN
    tf.keras.layers.Flatten(), 
    # 512 neuron hidden layer
    tf.keras.layers.Dense(512, activation='relu'), 
    # Only 1 output neuron. It will contain a value from 0-1 where 0 for 1 class ('cats') and 1 for the other ('dogs')
    tf.keras.layers.Dense(1, activation='sigmoid') 
])

model.compile(optimizer=RMSprop(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
TRAINING_DIR = '/kaggle/working/cats-v-dogs/training/'
train_datagen = ImageDataGenerator( rescale = 1.0/255. )

train_generator = train_datagen.flow_from_directory(TRAINING_DIR,
                                                    batch_size=20,
                                                    class_mode='binary',
                                                    target_size=(150, 150))

VALIDATION_DIR = '/kaggle/working/cats-v-dogs/validation/'
validation_datagen = ImageDataGenerator( rescale = 1.0/255. )

validation_generator = validation_datagen.flow_from_directory(VALIDATION_DIR,
                                                         batch_size=20,
                                                         class_mode  = 'binary',
                                                         target_size = (150, 150))

In [None]:
history = model.fit(train_generator,
                    epochs=5,
                    verbose=1,
                    validation_data=validation_generator)

In [None]:
# PLOTTING LOSS AND ACCURACY
%matplotlib inline

import matplotlib.image  as mpimg
import matplotlib.pyplot as plt

#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
acc=history.history['accuracy']
val_acc=history.history['val_accuracy']
loss=history.history['loss']
val_loss=history.history['val_loss']

epochs=range(len(acc)) # Get number of epochs

#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot(epochs, acc, 'r', "Training Accuracy")
plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
plt.title('Training and validation accuracy')
plt.figure()

#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.plot(epochs, loss, 'r', "Training Loss")
plt.plot(epochs, val_loss, 'b', "Validation Loss")


plt.title('Training and validation loss')

In [None]:
#Predicting images in testset
%matplotlib inline

import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
from keras.preprocessing import image

count = 10
for dirname, _, filenames in os.walk('/kaggle/working/dogs-vs-cats/test1'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        # predicting images
        path = os.path.join(dirname, filename)
        img = image.load_img(path, target_size=(150, 150))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        img = mpimg.imread(path)
        plt.imshow(img)
        plt.show()
        images = np.vstack([x])
        classes = model.predict(images, batch_size=10)
        if classes[0]>0.5:
            print(filename + " is a dog")
        else:
            print(filename + " is a cat")
        print("--"*30)
        count -= 1
        if(count < 0):
            break

In [None]:
#Transfer learning using Inception v3
# Download the inception v3 weights
!wget --no-check-certificate \
    https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5 \
    -O /tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5

# Import the inception model  
from tensorflow.keras.applications.inception_v3 import InceptionV3

# Create an instance of the inception model from the local pre-trained weights
local_weights_file = '/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'

pre_trained_model = InceptionV3(
    input_shape=(150, 150, 3),
    include_top=False,
    weights=None
)

pre_trained_model.load_weights(local_weights_file)

# Make all the layers in the pre-trained model non-trainable
for layer in pre_trained_model.layers:
    layer.trainable = False

# Print the model summary
pre_trained_model.summary()

In [None]:
last_layer = pre_trained_model.get_layer('mixed7')
print('last layer output shape: ', last_layer.output_shape)
last_output = last_layer.output

In [None]:
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.optimizers import RMSprop

# Flatten the output layer to 1 dimension
x = layers.Flatten()(last_output)
# Add a fully connected layer with 1,024 hidden units and ReLU activation
x = layers.Dense(1024, activation='relu')(x)
# Add a dropout rate of 0.2
x = layers.Dropout(.2)(x)                  
# Add a final sigmoid layer for classification
x = layers.Dense(1, activation='sigmoid')(x)           

model1 = Model(pre_trained_model.input, x) 

model1.compile(
    optimizer=RMSprop(lr=0.0001), 
    loss='binary_crossentropy', 
    metrics=['accuracy']
)

model1.summary()

In [None]:
TRAINING_DIR = '/kaggle/working/cats-v-dogs/training/'
VALIDATION_DIR = '/kaggle/working/cats-v-dogs/validation/'

# ImageDataGenerator to do Image Augmentation
train_datagen1 = ImageDataGenerator(
    rescale=1. / 255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
    )

validation_datagen1 = ImageDataGenerator(rescale=1 / 255)

train_generator1 = train_datagen1.flow_from_directory(TRAINING_DIR,
                                                    batch_size=20,
                                                    class_mode='binary',
                                                    target_size=(150, 150))

validation_generator1 = validation_datagen1.flow_from_directory(VALIDATION_DIR,
                                                         batch_size=20,
                                                         class_mode  = 'binary',
                                                         target_size = (150, 150))

In [None]:
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('accuracy')>0.999):
            print("\nReached 99.9% accuracy so cancelling training!")
            self.model.stop_training = True
        
callbacks = myCallback()
history1 = model1.fit_generator(
    train_generator1,
    epochs=5,
    validation_data=validation_generator1,
    callbacks=[callbacks]
)

In [None]:
# PLOTTING LOSS AND ACCURACY
%matplotlib inline

import matplotlib.image  as mpimg
import matplotlib.pyplot as plt

#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
acc=history1.history['accuracy']
val_acc=history1.history['val_accuracy']
loss=history1.history['loss']
val_loss=history1.history['val_loss']

epochs=range(len(acc)) # Get number of epochs

#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot(epochs, acc, 'r', "Training Accuracy")
plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
plt.title('Training and validation accuracy')
plt.figure()

#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.plot(epochs, loss, 'r', "Training Loss")
plt.plot(epochs, val_loss, 'b', "Validation Loss")


plt.title('Training and validation loss')

In [None]:
#Predicting images in testset
%matplotlib inline

import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
from keras.preprocessing import image

count = 10
for dirname, _, filenames in os.walk('/kaggle/working/dogs-vs-cats/test1'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        # predicting images
        path = os.path.join(dirname, filename)
        img = image.load_img(path, target_size=(150, 150))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        img = mpimg.imread(path)
        plt.imshow(img)
        plt.show()
        images = np.vstack([x])
        classes = model1.predict(images, batch_size=10)
        if classes[0]>0.5:
            print(filename + " is a dog")
        else:
            print(filename + " is a cat")
        print("--"*30)
        count -= 1
        if(count < 0):
            break