In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import random

# Sklearn
from sklearn.model_selection import train_test_split

# keras
from keras_preprocessing.image import ImageDataGenerator, load_img
from keras.utils import to_categorical



Unzip the folders

In [None]:
import zipfile
zip_files = ['test1', 'train']

for zip_file in zip_files:
    with zipfile.ZipFile("../input/dogs-vs-cats/{}.zip".format(zip_file), "r") as z:
        z.extractall(".")
        print("{} unzipped".format(zip_file))

In [None]:
print(os.listdir("../input/dogs-vs-cats"))

In [None]:
train_folder_path = "../working/train"
filenames = os.listdir(train_folder_path)

In [None]:
categories = []
for filename in filenames:
  category = filename.split(".")[0]
  if category == 'dog':
    categories.append(1)
  else:
    categories.append(0)

df = pd.DataFrame({'filename': filenames, 
                   'category' : categories})

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df['category'].value_counts()

In [None]:
df.shape

In [None]:
df['category'].value_counts().plot(kind = 'bar')

## See sample image

In [None]:
train_folder_path + "/" +sample

In [None]:
sample= random.choice(filenames)
image = load_img(train_folder_path + "/" +sample)
plt.imshow(image)

# Build Model Framework 

In [None]:
img_wd, img_ht = 128, 128

In [None]:
# import layers from keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization


In [None]:
model = Sequential()

# layer 1 (Convo)
model.add(Conv2D(32,(3,3),activation='relu',
                 input_shape=(img_wd,img_ht,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout(0.25))

# layer 2 ( Convo)
model.add(Conv2D(64, (3,3), activation = 'relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.25))

# layer 3 (Convo)
model.add(Conv2D(128, (3,3), activation = 'relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.25))

# layer 4 Dense 
model.add(Flatten()) # input layer
model.add(Dense(512, activation = 'relu')) # hidden layer
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Dense(2, activation = 'softmax')) # output layer

# model compile
model.compile(loss = 'categorical_crossentropy', 
              optimizer = 'rmsprop', metrics = ['accuracy'])
model.summary()

## Callbacks

In [None]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [None]:
earlystop = EarlyStopping(patience=15)

**Learning Rate Reduction**    
We will reduce the learning rate when then accuracy not increase for 2 steps

In [None]:
lr_reduction = ReduceLROnPlateau(monitor = 'val_acc', patience = 2, verbose = 1, 
                                factor = 0.5, min_lr = 0.00001)

In [None]:
callbacks = [earlystop, lr_reduction]

## Data Preparation

Because we will use image genaretor with class_mode="categorical". We need to convert column category into string. Then imagenerator will convert it one-hot encoding which is good for our classification.

So we will convert 1 to dog and 0 to cat

In [None]:
df['category'] = df['category'].map({0: 'cat', 1: 'dog'})

In [None]:
train_df, validate_df = train_test_split(df, test_size= 0.2, random_state=42)
train_df = train_df.reset_index(drop = True)
validate_df = validate_df.reset_index(drop = True)

In [None]:
train_df['category'].value_counts().plot(kind = 'bar')

In [None]:
validate_df['category'].value_counts().plot(kind = 'bar')

In [None]:
train_size = train_df.shape[0]
val_size = validate_df.shape[0]
batch_size = 200

# Image Generator for data augmentation

## Train data generator

In [None]:
train_datagen = ImageDataGenerator(rotation_range= 15, 
                                  rescale= 1/255,
                                  shear_range= 0.1,
                                  zoom_range=0.2,
                                  horizontal_flip=True,
                                  width_shift_range = 0.1,
                                  height_shift_range=0.1)
train_generator = train_datagen.flow_from_dataframe(train_df, 
                                                   "../working/train/",
                                                   x_col = 'filename', 
                                                   y_col = 'category', 
                                                   target_size = (img_wd,img_ht),
                                                   class_mode = 'categorical')

## Validation Data Generator

In [None]:
valid_datagen = ImageDataGenerator(rescale = 1/255)

valid_generator = valid_datagen.flow_from_dataframe(validate_df, 
                                                   "../working/train/",
                                                   x_col = 'filename', 
                                                   y_col = 'category', 
                                                   target_size = (img_wd,img_ht),
                                                   class_mode = 'categorical', 
                                                   batch_size=batch_size)


## let's see the working of generator

In [None]:
example_df = train_df.sample(n = 1).reset_index(drop=True)
example_gen = train_datagen.flow_from_dataframe(example_df, 
                                                   "../working/train/",
                                                   x_col = 'filename', 
                                                   y_col = 'category', 
                                                   target_size = (img_wd,img_ht),
                                                   class_mode = 'categorical')

In [None]:
plt.figure(figsize = (12, 12))
for i in range(0, 20):
    plt.subplot(5, 4, i+1)
    for x_batch, y_batch in example_gen:
        img = x_batch[0]
        plt.imshow(img)
        break
plt.tight_layout()

# Model Fitting

In [None]:
Fast_run = False
epochs = 3 if Fast_run else 50
histroy = model.fit(train_generator, 
                             epochs = epochs,
                             validation_data=valid_generator,
                             validation_steps=val_size//batch_size, 
                             steps_per_epoch=train_size//batch_size,
                             callbacks = callbacks)

# Save Model

In [None]:
model.save('model.h5')

## Visualize Training

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize =(12,12))

ax1.plot(histroy.history['loss'], color = 'b', label = 'Training loss')
ax1.plot(histroy.history['val_loss'], color = 'r', label = 'validation loss')
ax1.set_xticks(np.arange(1, epochs, 1))
ax1.set_yticks(np.arange(0,1,0.1))

ax2.plot(histroy.history['accuracy'], color = 'b', label = 'Training acc')
ax2.plot(histroy.history['val_accuracy'],color = 'r', label = 'validation acc')
ax2.set_xticks(np.arange(1,epochs, 1))

legend = plt.legend(loc = 'best', shadow = True)
plt.tight_layout()

## Preparing Test Data

In [None]:
test_folder_path = "../working/test1"
test_filenames = os.listdir(test_folder_path)

test_df = pd.DataFrame({'filename': test_filenames})
test_size = test_df.shape[0]

## Creating Test Generators

In [None]:
test_datagen = ImageDataGenerator(rescale = 1/255)
test_generator = test_datagen.flow_from_dataframe(test_df,
                                                 "../working/test1/",
                                                 x_col = 'filename',
                                                 y_col = None,
                                                 class_mode = None,
                                                 target_size = (img_wd, img_ht),
                                                 batch_size = batch_size,
                                                 shuffle = False
                                                 )

# Predict

In [None]:
predict = model.predict(test_generator, steps = np.ceil(test_size/batch_size))

For categoral classication the prediction will come with probability of each category. So we will pick the category that have the highest probability with numpy average max

In [None]:
test_df['category'] = np.argmax(predict, axis = -1)

In [None]:
predict

In [None]:
test_df.head()

In [None]:
label_map = dict((v,k) for k,v in train_generator.class_indices.items())

test_df['category'] = test_df['category'].map(label_map)

In [None]:
test_df['category'] = test_df['category'].map({'cat':0, 'dog':1})

In [None]:
test_df.head()

In [None]:
test_df['category'].value_counts()

## See predicted results with images

In [None]:
sample_test = test_df.head(15)
sample_test.head()
plt.figure(figsize = (12,24))

for index, row in sample_test.iterrows():
    filename = row['filename']
    category = row['category']
    img = load_img("../working/test1/" + filename, 
                   target_size = (img_wd, img_ht))
    plt.subplot(5,3, index +1)
    plt.imshow(img)
    plt.xlabel(filename + '(' + "{}".format(category) + ")")
    
plt.tight_layout()

# Submission

In [None]:
submission_df = test_df.copy()
submission_df['id'] = submission_df['filename'].str.split(".").str[0]
submission_df.rename(columns= {'category': 'label'}, inplace = True)

In [None]:
submission_df.drop('filename', axis = 1, inplace = True)

In [None]:
submission_df.to_csv('submission.csv', index = False)