In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from zipfile import ZipFile
import shutil
import random
import cv2
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

### Extract and read images

In [None]:
train_path = '/kaggle/input/dogs-vs-cats/train.zip'
test_path = '/kaggle/input/dogs-vs-cats/test1.zip'

In [None]:
DATA_PATH = './dogs-vs-cats'

# extract train data
with ZipFile(train_path, 'r') as zip_ref:
    zip_ref.extractall(DATA_PATH)
    
# extract test data
with ZipFile(test_path, 'r') as zip_ref:
    zip_ref.extractall(DATA_PATH)

In [None]:
print(f'Number of training images: {len(os.listdir(DATA_PATH + "/train"))}')
print(f'Number of test images: {len(os.listdir(DATA_PATH + "/test1"))}')

### Display few images

In [None]:
# path of extracted train images
path = DATA_PATH + '/train'

fig, axis = plt.subplots(2, 5, figsize = (14, 5))
for ax, file in zip(axis.flatten(), os.listdir(path)[:10]):
    img = cv2.imread(os.path.join(path, file))
    img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (224, 224))
    ax.imshow(img)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.tight_layout()

### Prepare training and validation data

In [None]:
# list of training files
train_files = os.listdir(path)
labels = []

for file in train_files:
    label = file.split('.')[0]
    if label == 'dog':
        labels.append(1)
    else:
        labels.append(0)
        
train_df = pd.DataFrame({'file_names': train_files, 'labels': labels})

In [None]:
train_df.sample(10)

In [None]:
# Check the number of images in each category
train_df.labels.value_counts().plot(kind = 'bar', title = 'Number of images in each category', 
                                    xlabel = 'category', ylabel = 'count');

In [None]:
train_df['labels'] = train_df['labels'].replace({0: 'cat', 1: 'dog'})
train_df.head()

In [None]:
# split dataframe 
df_train, df_val = train_test_split(train_df, test_size = 0.12, stratify = train_df['labels'], random_state = 42)

df_train = df_train.reset_index(drop = True)
df_val = df_val.reset_index(drop = True)

In [None]:
print(f'Shape of df_train: {df_train.shape}\nShape of df_val: {df_val.shape}')

### Check the distribution of images in validation dataframe

In [None]:
df_val.labels.value_counts().plot(kind = 'bar', title = 'Number of images in each category', 
                                    xlabel = 'category', ylabel = 'count');

In [None]:
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 64

### Create training and validation generators

In [None]:
train_aug = ImageDataGenerator(rescale = 1 / 255.0, rotation_range = 30, zoom_range = 0.2, width_shift_range = 0.2,
                               height_shift_range = 0.2, shear_range = 0.2, brightness_range = (0.1, 0.9),
                               fill_mode = 'nearest', horizontal_flip = True)

train_gen = train_aug.flow_from_dataframe(df_train, path, x_col = 'file_names', y_col = 'labels', class_mode = 'categorical',
                                         target_size = IMAGE_SIZE, batch_size = BATCH_SIZE)

In [None]:
# validation generator
val_aug = ImageDataGenerator(rescale = 1 / 255.0)

val_gen = val_aug.flow_from_dataframe(df_val, path, x_col = 'file_names', y_col = 'labels', class_mode = 'categorical',
                                     target_size = IMAGE_SIZE, batch_size = BATCH_SIZE)

### Display random augmented image

In [None]:
random_image = train_df.sample(1).reset_index(drop = True)
random_gen = train_aug.flow_from_dataframe(random_image, path, x_col = 'file_names', y_col = 'labels', 
                                           target_size = IMAGE_SIZE, class_mode = 'categorical')

In [None]:
fig, axis = plt.subplots(2, 5, figsize = (14, 5))
for ax in axis.flatten():
    for image_batch, label_batch in random_gen:
        ax.imshow(image_batch[0])
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        break
plt.tight_layout()

### Load pretrained InceptionV3 model

In [None]:
base_model = InceptionV3(include_top = False, weights = 'imagenet', input_shape = (224, 224, 3))

# freeze trained network
for layer in base_model.layers:
    layer.trainable = False

In [None]:
# check the shape of the last layer
last_layer = base_model.layers[-1].output
print(f'Last Layer output shape: {last_layer.shape}')

In [None]:
x = layers.GlobalAveragePooling2D()(last_layer)
x = layers.Dense(1024, activation = 'relu')(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(512, activation = 'relu')(x)
x = layers.Dense(2, activation = 'softmax')(x)

model = tf.keras.Model(inputs = base_model.input,outputs = x)
model.summary()

### Train the model

In [None]:
# compile the model
model.compile(optimizer = tf.keras.optimizers.RMSprop(lr = 0.0001), loss = 'binary_crossentropy', metrics = ['acc'])

# train the model
history = model.fit(train_gen, validation_data = val_gen, epochs = 8, verbose = 1)

## Fine tune the model

In [None]:
for layer in base_model.layers[:197]:
    layer.trainable = False
    
for layer in base_model.layers[197:]:
    layer.trainable = True
    
    
# compile the model
model.compile(optimizer = tf.keras.optimizers.RMSprop(lr = 0.0001), loss = 'binary_crossentropy', metrics = ['acc'])

# train the model
history = model.fit(train_gen, validation_data = val_gen, epochs = 6, verbose = 1)

### Create test generator

In [None]:
# path of test images
path = DATA_PATH + '/test1'

df_test = pd.DataFrame({'file_names': os.listdir(path)})


test_aug = ImageDataGenerator(rescale = 1 / 255.0)

test_gen = test_aug.flow_from_dataframe(df_test, path, x_col = 'file_names', y_col = None, class_mode = None, 
                                        target_size = IMAGE_SIZE, batch_size = BATCH_SIZE, shuffle = False)

### Make predictions on test images

In [None]:
predict = model.predict(test_gen, steps = np.ceil(df_test.shape[0] / BATCH_SIZE))

df_test['labels'] = np.argmax(predict, axis = -1)

In [None]:
label_map = dict((v, k) for k, v in train_gen.class_indices.items())
df_test['labels'] = df_test['labels'].replace(label_map)
df_test['labels'] = df_test['labels'].replace({'dog': 1, 'cat': 0 })

In [None]:
df_test.head()

In [None]:
submission_df = df_test.copy()
submission_df['id'] = submission_df['file_names'].str.split('.').str[0]
submission_df.drop('file_names', axis = 1, inplace = True)
submission_df.head()

In [None]:
submission_df.to_csv('submission.csv', index = False)