In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**Imports**

In [None]:
import os
import zipfile
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

local = '../input/dogs-vs-cats/train.zip'

with zipfile.ZipFile('../input/dogs-vs-cats/train.zip', 'r') as z :
    z.extractall()

**Get Images**

In [None]:
filename = os.listdir('./train')
categories = []

for imag in filename:
    categ = imag.split('.')[0]

    if(categ == 'dog'):
        categories.append(1)
    else:
        categories.append(0)
    

df = pd.DataFrame({'filename' : filename , 'categories' : categories})
df.head()
    

In [None]:
df['categories'].value_counts()

# Showing Few Images ****

In [None]:
for i in range(10):
    sample = filename[i+10]
    image = tf.keras.preprocessing.image.load_img('./train/' + sample)
    plt.imshow(image)
    plt.title('dog' if categories[i+10] == 1 else 'cat')
    plt.show()

# Model Creation

In [None]:
model = tf.keras.Sequential()

#Conv1
model.add(tf.keras.layers.Conv2D(32 , 3 , input_shape=[128 , 128 , 3] , activation = 'relu' , padding = 'same') )
model.add(tf.keras.layers.MaxPool2D(2))
model.add(tf.keras.layers.Dropout(0.2))

#Conv2
model.add(tf.keras.layers.Conv2D(64 , 3  , activation = 'relu' , padding = 'same') )
model.add(tf.keras.layers.MaxPool2D(2))
model.add(tf.keras.layers.Dropout(0.2))

#Conv3
model.add(tf.keras.layers.Conv2D(128 , 3  , activation = 'relu' , padding = 'same') )
model.add(tf.keras.layers.MaxPool2D(2))
model.add(tf.keras.layers.Dropout(0.3))

#Conv4
model.add(tf.keras.layers.Conv2D(128 , 3  , activation = 'relu' , padding = 'same') )
model.add(tf.keras.layers.MaxPool2D(2))
model.add(tf.keras.layers.Dropout(0.25))


model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(512 , activation = 'relu'))
model.add(tf.keras.layers.Dropout(0.25))

model.add(tf.keras.layers.Dense(2 , activation = 'sigmoid'))


# COMPILING
model.compile(loss = 'binary_crossentropy' , optimizer = RMSprop(lr=1e-4) , metrics=['accuracy'])

model.summary()


In [None]:
earlystop = tf.keras.callbacks.EarlyStopping(patience=2)
learning_rate_reduction = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_acc', 
                                            patience=2, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)
callbacks = [earlystop, learning_rate_reduction]

# Data Preparation

In [None]:
df['categories'] = df['categories'].map({0 : 'cat', 1 : 'dog'}) 
# Cos flow from data requires y to be string

In [None]:
df.head()

In [None]:
train_df , test_df = train_test_split(df , test_size=0.2 , random_state=42)

train_df.reset_index(drop=True , inplace=True)
test_df.reset_index(drop=True , inplace=True)

In [None]:
total_train = train_df.shape[0]
total_test = test_df.shape[0]
batch_size = 20

**Train and Valid Generator**

In [None]:
# train_datagen = ImageDataGenerator(
#       rescale=1./255,
#       rotation_range=20,
#       horizontal_flip=True,
#       fill_mode='nearest')

# train_generator = train_datagen.flow_from_dataframe(train_df , '/.train/' , 
#                                                     x_col='filename' , y_col='categories',
#                                                     target_size=[128,128] , class_mode='categorical',
#                                                     batch_size=batch_size)

train_data = tf.keras.preprocessing.image.ImageDataGenerator(rotation_range=20,
                                                         rescale = 1./255,
                                                         horizontal_flip=True)
train_generator = train_data.flow_from_dataframe(train_df,
                                                './train/',
                                                x_col = 'filename',
                                                y_col = 'categories',
                                                target_size=[128,128],
                                                class_mode='categorical',
                                                batch_size=batch_size)


In [None]:
valid_data = ImageDataGenerator(rescale = 1./255)

valid_generator = valid_data.flow_from_dataframe(test_df , './train/' , 
                                                 x_col = 'filename' , y_col = 'categories' ,
                                                 target_size = [128 , 128] , class_mode='categorical' ,
                                                batch_size=batch_size)

In [None]:
history = model.fit_generator(train_generator , epochs = 9 , validation_data=valid_generator , 
                              validation_steps = total_test // batch_size , 
                              steps_per_epoch = total_train // batch_size , 
                              callbacks = callbacks)

**Saving Parameters**

In [None]:
model.save_weights('model.hdf5')

In [None]:
import numpy as np

epochs = range(len(history.history['accuracy']))

plt.plot(epochs , history.history['accuracy'] , 'b' , label='Training Acc')
plt.plot(epochs , history.history['val_accuracy'] , 'r' , label='Validation Acc')
plt.title('Training and Validation Accuracy')

plt.figure()

plt.plot(epochs, history.history['loss'], 'b', label='Training Loss')
plt.plot(epochs, history.history['val_loss'], 'g', label='Validation Loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

# Preparing Test DATA

In [None]:
with zipfile.ZipFile('../input/dogs-vs-cats/test1.zip', 'r') as z :
    z.extractall()

In [None]:
filenames = os.listdir("./test1")
test_df = pd.DataFrame({'filename' : filenames})    
samples = test_df.shape[0]

In [None]:
test_data = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
test_generator = test_data.flow_from_dataframe(
    test_df, 
    "./test1/", 
    x_col='filename',
    y_col=None,
    class_mode=None,
    target_size=[128,128],
    batch_size=batch_size,
    shuffle=False)

In [None]:
predict = model.predict_generator(test_generator, steps=np.ceil(samples/batch_size))

In [None]:
test_df['category'] = np.argmax(predict, axis=-1)
test_df['category'] = test_df['category'].replace({ 'dog': 1, 'cat': 0 })

In [None]:
submission_df = test_df.copy()
submission_df['id'] = submission_df['filename'].str.split('.').str[0]
submission_df['label'] = submission_df['category']
submission_df.drop(['filename', 'category'], axis=1, inplace=True)
submission_df.to_csv('submission.csv', index=False)

In [None]:
test_df.head()