In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
import zipfile

files_to_unzip = ['test1' , 'train']

for zip_file in files_to_unzip:
    with zipfile.ZipFile("../input/dogs-vs-cats/{}.zip".format(zip_file), "r") as z:
        z.extractall(".")
        print("{} unzipped".format(zip_file))

In [None]:
filenames = os.listdir("../working/train")

categories = []

for file_name in filenames:
    if file_name.split('.')[0] == 'dog':
        categories.append(1)
    else:
        categories.append(0)
        
df = pd.DataFrame({
    'filename' : filenames,
    'category' : categories
})

df.head()

In [None]:
import seaborn as sns

sns.countplot(x = 'category', data = df)

### Will show a random sample image from train set

In [None]:
from tensorflow.keras.preprocessing.image import load_img, ImageDataGenerator
import random
import matplotlib.pyplot as plt

sample = random.choice(filenames)

sample_image = load_img("../working/train/" + sample)

plt.imshow(sample_image)

In [None]:
image_size = 128

input_shape = (image_size, image_size, 3)



In [None]:
simple_model = keras.Sequential([
    
    layers.Conv2D(filters = 32, kernel_size = 3, activation = 'relu', input_shape = input_shape),
    layers.BatchNormalization(),
    layers.MaxPool2D(),
    layers.Dropout(0.25),
    
    layers.Conv2D(filters = 64, kernel_size = 3, activation = 'relu'),
    layers.BatchNormalization(),
    layers.MaxPool2D(),
    layers.Dropout(0.25),
    
    layers.Conv2D(filters = 128, kernel_size = 3, activation = 'relu'),
    layers.BatchNormalization(),
    layers.MaxPool2D(),
    layers.Dropout(0.25),
    
    layers.Flatten(),
    
    layers.Dense(512, activation = 'relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.25),
    
    layers.Dense(1, activation = 'sigmoid'),
    
    
])

simple_model.summary()

In [None]:
simple_model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [None]:


df["category"] = df["category"].replace({0: 'cat', 1: 'dog'}) 



In [None]:
from sklearn.model_selection import train_test_split

train_df, validate_df = train_test_split(df, test_size = 0.2)
train_df = train_df.reset_index()
validate_df = validate_df.reset_index()

In [None]:
train_datagen = ImageDataGenerator(
    
    rescale = 1./255,
    rotation_range = 15, 
    shear_range = 0.1,
    zoom_range = 0.2,
    horizontal_flip = True,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
)

In [None]:
validation_datagen = ImageDataGenerator(rescale = 1./255)

In [None]:
batch_size = 150

In [None]:
train_generator = train_datagen.flow_from_dataframe(train_df, 
                                                    "../working/train/", 
                                                    x_col = 'filename', 
                                                    y_col = 'category',
                                                    class_mode = 'binary',
                                                    target_size = (128, 128), 
                                                    batch_size = batch_size,
                                                   )

In [None]:
validation_generator = validation_datagen.flow_from_dataframe(validate_df, 
                                                    "../working/train/", 
                                                    x_col = 'filename', 
                                                    y_col = 'category',
                                                    class_mode = 'binary',
                                                    target_size = (128, 128), 
                                                    batch_size = batch_size,
                                                   )

### See some sample images

In [None]:
example_df = train_df.sample(n=1).reset_index(drop = True)

example_generator = train_datagen.flow_from_dataframe(example_df,
                                                      "../working/train",
                                                     x_col = 'filename',
                                                     y_col = 'category',
                                                    
                                                     )



In [None]:
plt.figure(figsize=(12, 12))
for i in range(0, 9):
    plt.subplot(3, 3, i+1)
    for X_batch, Y_batch in example_generator:
        image = X_batch[0]
        plt.imshow(image)
        break
plt.tight_layout()
plt.show()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

earlystop = EarlyStopping(patience=10)

callbacks = [earlystop]


In [None]:


history = simple_model.fit(train_generator,
                                epochs=5,     # 20
                                validation_data = validation_generator,
                                validation_steps = validate_df.shape[0]//batch_size,     
                                steps_per_epoch = train_df.shape[0]//batch_size,
                                callbacks = callbacks
                                )



In [None]:
history_frame = pd.DataFrame(history.history)

history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['accuracy', 'val_accuracy']].plot()

## VGG16

In [None]:
from tensorflow.keras.applications.vgg16 import VGG16

pretrained_base = VGG16(input_shape = input_shape, include_top = False, weights = 'imagenet')

pretrained_base.trainable = False

In [None]:
x = layers.Flatten()(pretrained_base.output)

x = layers.Dense(512, activation = 'relu')(x)

x = layers.Dropout(0.5)(x)

x = layers.Dense(1, activation = 'sigmoid')(x)

model = keras.models.Model(pretrained_base.input, x)

model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

model.summary()

In [None]:
history_vgg = model.fit(train_generator,
                    epochs = 10,     # 20
                    validation_data = validation_generator,
                    validation_steps = validate_df.shape[0]//batch_size,     
                    steps_per_epoch = train_df.shape[0]//batch_size,
                    callbacks = callbacks
                    )

In [None]:
history_vgg_frame = pd.DataFrame(history_vgg.history)

history_vgg_frame.loc[:, ['loss', 'val_loss']].plot()
history_vgg_frame.loc[:, ['accuracy', 'val_accuracy']].plot()