In [None]:
import numpy as np
import pandas as pd 
import tensorflow as tf

import matplotlib.pyplot as plt
import random
import os
print(os.listdir("../input/dogs-vs-cats/"))

In [None]:
%%time
import zipfile
with zipfile.ZipFile("../input/dogs-vs-cats/train.zip",'r') as z:
    z.extractall(".")
with zipfile.ZipFile("../input/dogs-vs-cats/test1.zip",'r') as z:
    z.extractall(".")

In [None]:
FAST_RUN = True
IMAGE_WIDTH=128
IMAGE_HEIGHT=128
IMAGE_SIZE=(IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_CHANNELS=3

## Preprocessing Data

In [None]:
from keras.preprocessing.image import ImageDataGenerator, load_img
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [None]:
filenames = os.listdir("/kaggle/working/train")
categories = []
for filename in filenames:
    category = filename.split('.')[0]
    categories.append(category)

files_df = pd.DataFrame({
    'filename': filenames,
    'category': categories
})

In [None]:
train_df, validate_df = train_test_split(files_df, 
                                         test_size=0.20,                                         
                                         random_state=0)
train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)

In [None]:
total_train = train_df.shape[0]
total_validate = validate_df.shape[0]
batch_size=15

## Preparing Image Data

In [None]:
train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    "/kaggle/working/train/", 
    x_col='filename',
    y_col='category',
    target_size=IMAGE_SIZE,
    class_mode='categorical',
    batch_size=batch_size
)

In [None]:
validation_datagen = ImageDataGenerator(rescale=1./255)

validation_generator = validation_datagen.flow_from_dataframe(validate_df, 
    "/kaggle/working/train/", 
    x_col='filename',
    y_col='category',
    target_size=IMAGE_SIZE,
    class_mode='categorical',
    batch_size=batch_size
)

## Creating CNN Model

In [None]:
from keras.models import Sequential
from keras.applications import VGG16
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation, BatchNormalization, GlobalAveragePooling2D

In [None]:
def create_model():        
    
    model = Sequential()
    model.add(Conv2D(32, (3, 3), 
                     activation='relu',
                     input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS)))
    
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(60, (3, 3), activation='relu',strides=2))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    
    model.add(Dense(100, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    
    model.add(Dense(2, activation='sigmoid'))
    
    model.compile(loss='categorical_crossentropy', 
                  optimizer='adam',
                  metrics=['accuracy'])

    #model.summary()
    return model

In [None]:
def createVGG16model():
    pretrained_model = VGG16(weights='imagenet', include_top=False ,input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS))
    pretrained_model.trainable = False 
    
    model = Sequential([pretrained_model,
                        GlobalAveragePooling2D(),
                        Dense(2, activation='softmax')])

    model.compile(optimizer='adam',
                    loss = 'categorical_crossentropy',
                    metrics=['accuracy'])

    return model

### Callbacks

In [None]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
earlystop = EarlyStopping(patience=10)

learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=2, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

callbacks = [earlystop, learning_rate_reduction]

## Fitting the Model

In [None]:
%%time
if FAST_RUN :
    epochs = 3
else:
    epochs = 30

#with strategy.scope():
with tf.device("/gpu:0"):   
    #model = createVGG16model()
    model = create_model()
    history = model.fit(train_generator, 
                        epochs=epochs,
                        validation_data=validation_generator,
                        validation_steps=1,
                        steps_per_epoch=total_train//batch_size,
                        callbacks=callbacks)

model.save_weights("model.h5")

## Output Graph

In [None]:
import seaborn as sns
his_dict = history.history
fig = plt.figure(figsize=(12, 15))
x_range = range(len(history.history['loss']))
sns.set_style('darkgrid')

fig.add_subplot(2,1,1)
sns.lineplot(x=x_range , y=his_dict["val_loss"],label='Validation Loss')
sns.lineplot(x=x_range , y=his_dict["loss"],label='Training Loss')

fig.add_subplot(2,1,2)
sns.lineplot(x=x_range , y=his_dict["val_accuracy"],label='Validation Accuracy')
sns.lineplot(x=x_range , y=his_dict["accuracy"],label='Training Accuracy')

## Predictions

In [None]:
test_filenames = os.listdir("/kaggle/working/test1")
test_df = pd.DataFrame({
    'filename': test_filenames
})
idlist=[]
for file in test_df["filename"]:
    idlist.append(file.split(".")[0])

#test_df['ids'] = alist
nb_samples = test_df.shape[0]

In [None]:
test_gen = ImageDataGenerator(rescale=1./255)
test_generator = test_gen.flow_from_dataframe(
    test_df, 
    "/kaggle/working/test1", 
    x_col='filename',
    y_col=None,
    class_mode=None,
    target_size=IMAGE_SIZE,
    batch_size=batch_size,
    shuffle=False
)

In [None]:
predict = model.predict_generator(test_generator, steps=np.ceil(nb_samples/batch_size))

In [None]:
test_df['category'] = np.argmax(predict, axis=-1)

In [None]:
label_map = dict((v,k) for k,v in train_generator.class_indices.items())
test_df['category'] = test_df['category'].replace(label_map)

In [None]:
test_df['category'] = test_df['category'].replace({ 'dog': 1, 'cat': 0 })

## Submission

In [None]:
submission_df = test_df.copy()
submission_df['id'] = submission_df['filename'].str.split('.').str[0]
submission_df['label'] = submission_df['category']
submission_df.drop(['filename', 'category'], axis=1, inplace=True)
submission_df.to_csv('submission.csv', index=False)