In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import shutil
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import zipfile
with zipfile.ZipFile('/kaggle/input/dogs-vs-cats/train.zip',mode='r') as z:
    z.extractall(".")
with zipfile.ZipFile('/kaggle/input/dogs-vs-cats/test1.zip',mode='r') as z:
    z.extractall(".")

In [None]:
cats,dogs = [],[]
file_names = os.listdir("./train/")
for i in file_names:
    if i.split(".")[0]=='cat':
        cats.append(i)
    else:
        dogs.append(i)

In [None]:
len(cats),len(dogs)

In [None]:
!mkdir my_dataset
!mkdir my_dataset/training_data
!mkdir my_dataset/training_data/cats
!mkdir my_dataset/training_data/dogs
!mkdir my_dataset/validation_data
!mkdir my_dataset/validation_data/cats
!mkdir my_dataset/validation_data/dogs

In [None]:
train_size = int(len(cats)*.8) #80 percentage


for f in cats[:train_size]:
    shutil.move("./train/"+f, './my_dataset/training_data/cats/')

for f in cats[train_size:]:
    shutil.move("./train/"+f, './my_dataset/validation_data/cats/')
    
for f in dogs[:train_size]:
    shutil.move("./train/"+f, './my_dataset/training_data/dogs/')

for f in dogs[train_size:]:
    shutil.move("./train/"+f, './my_dataset/validation_data/dogs/')

In [None]:
base_dir = './my_dataset/'

train_dir = os.path.join(base_dir, 'training_data')
validation_dir = os.path.join(base_dir, 'validation_data')

# Directory with our training cat/dog pictures
train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')

# Directory with our validation cat/dog pictures
validation_cats_dir = os.path.join(validation_dir, 'cats')
validation_dogs_dir = os.path.join(validation_dir, 'dogs')

In [None]:
train_cat_fnames = os.listdir( train_cats_dir )
train_dog_fnames = os.listdir( train_dogs_dir )

validation_cat_fnames = os.listdir( validation_cats_dir )
validation_dog_fnames = os.listdir( validation_dogs_dir )

print(train_cat_fnames[:10])
print(train_dog_fnames[:10])

In [None]:
print('total training cat images :', len(os.listdir(      train_cats_dir ) ))
print('total training dog images :', len(os.listdir(      train_dogs_dir ) ))

print('total validation cat images :', len(os.listdir( validation_cats_dir ) ))
print('total validation dog images :', len(os.listdir( validation_dogs_dir ) ))

In [None]:
%matplotlib inline

import matplotlib.image as mpimg
import matplotlib.pyplot as plt

# Parameters for our graph; we'll output images in a 4x4 configuration
nrows = 4
ncols = 4

pic_index = 0 # Index for iterating over images

In [None]:
# Set up matplotlib fig, and size it to fit 4x4 pics
fig = plt.gcf()
fig.set_size_inches(ncols*4, nrows*4)

pic_index+=8

next_cat_pix = [os.path.join(train_cats_dir, fname) 
                for fname in train_cat_fnames[ pic_index-8:pic_index] 
               ]

next_dog_pix = [os.path.join(train_dogs_dir, fname) 
                for fname in train_dog_fnames[ pic_index-8:pic_index]
               ]

for i, img_path in enumerate(next_cat_pix+next_dog_pix):
  # Set up subplot; subplot indices start at 1
  sp = plt.subplot(nrows, ncols, i + 1)
  sp.axis('Off') # Don't show axes (or gridlines)

  img = mpimg.imread(img_path)
  plt.imshow(img)

plt.show()


Tensorflow Model

In [None]:
import tensorflow as tf
print(tf.__version__)

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(150,150,3)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.summary()

In [None]:
from tensorflow.keras.optimizers import RMSprop

model.compile(optimizer=RMSprop(lr=0.001),
             loss='binary_crossentropy',
             metrics=['accuracy'])

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale= 1.0/255)
valid_datagen = ImageDataGenerator(rescale= 1.0/255)

train_generator = train_datagen.flow_from_directory(train_dir,
                                                   batch_size=20,
                                                   class_mode='binary',
                                                   target_size=(150,150))

validation_generator = valid_datagen.flow_from_directory(validation_dir,
                                                   batch_size=20,
                                                   class_mode='binary',
                                                   target_size=(150,150))

In [None]:
history = model.fit(train_generator,
                   validation_data=validation_generator,
                   steps_per_epoch=100,
                   epochs=15,
                   validation_steps=50,
                   verbose=2)

MODEL hISTORY Plotting

In [None]:
def plot_model_history(history):
    #-----------------------------------------------------------
    # Retrieve a list of list results on training and test data
    # sets for each training epoch
    #-----------------------------------------------------------
    acc      = history.history[     'accuracy' ]
    val_acc  = history.history[ 'val_accuracy' ]
    loss     = history.history[    'loss' ]
    val_loss = history.history['val_loss' ]

    epochs   = range(len(acc)) # Get number of epochs

    #------------------------------------------------
    # Plot training and validation accuracy per epoch
    #------------------------------------------------
    plt.plot  ( epochs,     acc )
    plt.plot  ( epochs, val_acc )
    plt.title ('Training and validation accuracy')
    plt.figure()

    #------------------------------------------------
    # Plot training and validation loss per epoch
    #------------------------------------------------
    plt.plot  ( epochs,     loss )
    plt.plot  ( epochs, val_loss )
    plt.title ('Training and validation loss'   )
    
    plt.show()

In [None]:
plot_model_history(history)

With Augmentation and Dropout

In [None]:
model2 = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(150,150,3)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model2.compile(optimizer=RMSprop(lr=0.001),
             loss='binary_crossentropy',
             metrics=['accuracy'])

model2.summary()

In [None]:
train_datagen_aug = ImageDataGenerator(rescale=1/255,
                                  rotation_range=40,
                                  width_shift_range=0.2,
                                  height_shift_range=0.2,
                                  shear_range=0.2,
                                  zoom_range=0.2,
                                  horizontal_flip=True,
                                  fill_mode='nearest')

# NOTE: YOU MUST USE A BATCH SIZE OF 10 (batch_size=10) FOR THE 
# TRAIN GENERATOR.
train_generator_aug = train_datagen_aug.flow_from_directory(train_dir,
                                                   batch_size=10,
                                                   class_mode='binary',
                                                   target_size=(150,150))

validation_datagen_aug = ImageDataGenerator(rescale=1./255,
      rotation_range=40,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest')

# NOTE: YOU MUST USE A BACTH SIZE OF 10 (batch_size=10) FOR THE 
# VALIDATION GENERATOR.
validation_generator_aug = validation_datagen_aug.flow_from_directory(validation_dir,
                                                              batch_size=100,
                                                              class_mode='binary',
                                                              target_size=(150, 150))

In [None]:
history2 = model2.fit(train_generator_aug,
                   validation_data=validation_generator_aug,
                   steps_per_epoch=100,
                   epochs=15,
                   validation_steps=50,
                   verbose=2)

In [None]:
plot_model_history(history2)

Transfer learning

In [None]:
!wget --no-check-certificate \
    https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5 \
    -O /tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5

In [None]:
from tensorflow.keras.applications.inception_v3 import InceptionV3

local_weights_file = '/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'

pre_trained_model = InceptionV3(input_shape=(150,150,3),
                               include_top=False,
                               weights=None)

pre_trained_model.load_weights(local_weights_file)

for layer in pre_trained_model.layers:
    layer.trainable=False
    
# pre_trained_model.summary()

In [None]:
last_layer = pre_trained_model.get_layer('mixed7')
print('Last layer output shape: ',last_layer.output_shape)
last_output = last_layer.output

In [None]:
x = tf.keras.layers.Flatten()(last_output)
x = tf.keras.layers.Dense(1024, activation='relu')(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model3 = tf.keras.Model(pre_trained_model.input,x)
model3.compile(optimizer=RMSprop(lr=0.0001),
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [None]:
history3 = model.fit(
                train_generator_aug,
                validation_data = validation_generator_aug,
                steps_per_epoch=100,
                epochs=15,
                validation_steps=50,
                verbose=2)

In [None]:
plot_model_history(history_3)

From above model we pick transfer learning as best model

Test and Submission

In [None]:
test_path='./test1'
test_file=os.listdir('./test1')

In [None]:
test_df=pd.DataFrame({'file':test_file})
test_df.head()

In [None]:
test_generator = valid_datagen.flow_from_dataframe(test_df,directory=test_path,
                                                 x_col='file',
                                                 y_col=None,
                                                 class_mode=None,
                                                 target_size=(150,150),
                                                 batch_size=32,
                                                 shuffle=False)

In [None]:
predict=model.predict(test_generator)
sub = np.around(predict).astype(int)

In [None]:
# Set up matplotlib fig, and size it to fit 4x4 pics
fig = plt.gcf()
fig.set_size_inches(ncols*4, nrows*4)

pic_index+=8

my_random_list = [random.randint(0, len(test_file)) for i in range(8)]
random_list = [os.path.join(test_path,test_file[i]) for i in my_random_list]


for i, img_path in enumerate(next_cat_pix):
  # Set up subplot; subplot indices start at 1
  sp = plt.subplot(nrows, ncols, i + 1)
  sp.axis('Off') # Don't show axes (or gridlines)

  img = mpimg.imread(img_path)
  plt.imshow(img)
  label = "Dog" if sub[my_random_list[i]]==1 else "Cat"
  plt.title('Predicted: '+str(label)+'\nScore: '+str(predict[my_random_list[i]]))

plt.show()


In [None]:
submission = test_df.copy()
submission['id'] = submission['file'].str.split(".").str[0]
submission['label'] = sub
submission.drop(['file'], axis=1, inplace=True)
submission['id'] = submission['id'].astype('int')
submission = submission.sort_values(by=['id'])
submission.to_csv('submission.csv', index=False)
print("Your submission was successfully saved!")

In [None]:
submission.head()

To do:

* Callbacks
* Tensorboard
* Augmentation
* Transfer Learning