In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!unzip /kaggle/input/dogs-vs-cats/train.zip -d /kaggle/working/

In [None]:
!unzip /kaggle/input/dogs-vs-cats/test1.zip -d /kaggle/working/

## Import Libraries


In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from matplotlib import pyplot as plt
from tensorflow.keras.applications import VGG16


## Prepare Data


In [None]:
Dtrain = os.listdir('./train')
categories = []
for filename in Dtrain:
    category = filename.split(".")[0]
    if category == "dog":
        categories.append("dog")
    else:
        categories.append("cat")
df = pd.DataFrame({
    "filename" : Dtrain,
    "category" : categories
})

In [None]:
print(df.head())

## Split data to train and validation set 


In [None]:
train_df, validate_df = train_test_split(df, test_size = 0.2, random_state = 42)
train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)

In [None]:
image_width, image_height = 128, 128


In [None]:
vgg_conv = VGG16(weights='imagenet', include_top=False, input_shape=(image_width, image_height, 3))

In [None]:
vgg_conv.summary()

In [None]:
for layer in (vgg_conv.layers):
    layer.trainable = False

In [None]:
vgg_conv.summary()

## Build Model

In [None]:
model = Sequential()

# Add the vgg convolutional base model
model.add(vgg_conv)

# Add new layers
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(2, activation='sigmoid'))

model.summary()

In [None]:
model.compile(loss='binary_crossentropy', optimizer=Adam(lr = 0.0001,decay=1e-6), metrics=['accuracy'])

### Callbacks

In [None]:
earlystop = EarlyStopping(patience=10)

learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=2, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

callbacks = [earlystop, learning_rate_reduction]

## Traning Generator


In [None]:


train_datagen = ImageDataGenerator(
    rotation_range=20,
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    "./train/", 
    x_col='filename',
    y_col='category',
    target_size=(image_width, image_height),
    class_mode='categorical',
    batch_size=64,
    color_mode="rgb",
    shuffle=True
)

## Validation Generator

In [None]:
validation_datagen = ImageDataGenerator(rescale=1./255)

validation_generator = validation_datagen.flow_from_dataframe(
    validate_df, 
    "./train/", 
    x_col='filename',
    y_col='category',
    target_size=(image_width, image_height),
    class_mode='categorical',
    batch_size=64,
    color_mode="rgb",
    shuffle=True
)

## show example

In [None]:
example_df = train_df.sample(n=1).reset_index(drop=True)
example_generator = train_datagen.flow_from_dataframe(
    example_df, 
    "./train/", 
    x_col='filename',
    y_col='category',
    target_size=(image_width, image_height),
    class_mode='categorical',
    color_mode="rgb",
    shuffle=True
)
plt.figure(figsize=(12,12))
for i in range(0,8):
    plt.subplot(2, 4, i+1)
    for x_batch, y_batch in example_generator:
        img = x_batch[0]
        plt.imshow(img)
        break
plt.tight_layout()
plt.show()

## Fit model

In [None]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch=len(train_df)//64,
    epochs=30,
    validation_data=validation_generator,
    validation_steps=len(validate_df)//64,
    callbacks=callbacks
)


### Save model weights

In [None]:
model.save_weights("CatsVsDogs.h5")


## Visualize Training


In [None]:
accuracy      = history.history['accuracy']
val_accuracy  = history.history['val_accuracy']
loss     = history.history['loss']
val_loss = history.history['val_loss']

epochs   = range(len(accuracy))

plt.plot(epochs, accuracy)
plt.plot(epochs, val_accuracy)
plt.title('Training and validation accuracy')
plt.figure()

# Plot training and validation loss per epoch
plt.plot(epochs, loss)
plt.plot(epochs, val_loss)
plt.title('Training and validation loss')

## prepare test data

In [None]:
Dtest = os.listdir('./test1')
df_test = pd.DataFrame({
    "filename" : Dtest
})

## Create Testing Generator

In [None]:
test_dataGen = ImageDataGenerator(1./255)

test_gen = test_dataGen.flow_from_dataframe(
    df_test,
    './test1',
    target_size = (image_width, image_height),
    x_col = "filename",
    y_col = None,
    batch_size = 64,
    class_mode = None,
    shuffle = True,
    color_mode="rgb",

)

## predict

In [None]:
predict = model.predict_generator(test_gen)

In [None]:
df_test['category'] = np.argmax(predict, axis=-1)


In [None]:
df_test["category"] = df_test["category"].replace({1 : "cat", 0 : "dog"})
df_test['category'].value_counts().plot.bar()
plt.ylabel(len(df_test), fontsize = 14)
plt.title(str(len(df_test[df_test['category'] == 'dog']))+" Dogs  |  "+str((len(df_test[df_test['category'] == 'cat'])))+" Cats", fontsize=18)
plt.show()

In [None]:
from keras.preprocessing.image import ImageDataGenerator, load_img


In [None]:
sample_test = df_test.head(25)
sample_test.head()
plt.figure(figsize=(12, 24))
for index, row in sample_test.iterrows():
    filename = row['filename']
    category = row['category']
    img = load_img("/kaggle/working/test1/"+filename, target_size=(image_width, image_height))
    plt.subplot(5, 5, index+1)
    plt.imshow(img)
    plt.xlabel(filename + '(' + "{}".format(category) + ')' )
plt.tight_layout()
plt.show()

## submission

In [None]:
submission_df = df_test.copy()
submission_df['id'] = submission_df['filename'].str.split('.').str[0]
submission_df['label'] = submission_df['category']
submission_df.drop(['filename', 'category'], axis=1, inplace=True)
submission_df.to_csv('submission.csv', index=False)