In [None]:
import os
import cv2
import zipfile

import numpy as np
import pandas as pd
import tensorflow as tf

from tqdm import tqdm
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
TEST_SIZE = 0.2
RANDOM_STATE = 42
BATCH_SIZE = 64
EPOCHS = 20
PATH = '/kaggle/input/dogs-vs-cats-redux-kernels-edition/'
TRAIN_FOLDER = './train/'
TEST_FOLDER =  './test/'
IMG_SIZE = 224

In [None]:
train_data_path = os.path.join(PATH, "train.zip")
test_data_path = os.path.join(PATH, "test.zip")

In [None]:
# extract train dataset
with zipfile.ZipFile(train_data_path,"r") as z:
    z.extractall()

In [None]:
# extract test dataset
with zipfile.ZipFile(test_data_path,"r") as z:
    z.extractall()

In [None]:
def one_hot_encoder(image_path):
    return 1 if image_path[:3] == 'dog' else 0

def imageprocess(image):
    image = cv2.resize(image, (IMG_SIZE, IMG_SIZE), cv2.INTER_CUBIC)
    return image

def procces_data(data_folder):
    data_image_list = os.listdir(data_folder)
    X, y = [], []
    for image_path in tqdm(data_image_list):
        img = cv2.imread(os.path.join(data_folder, image_path))
        X.append(imageprocess(img))
        y.append(one_hot_encoder(image_path))
    return X, y

def show_images(imgs, isTrain=False, model=None):
    f, ax = plt.subplots(nrows = 5, ncols = 5, figsize = (15,15))
    for i, img in enumerate(imgs[:25]):
        ax[i//5, i%5].imshow(img)
        ax[i//5, i%5].axis('off')
        if isTrain:
            img = imageprocess(img)
            img = img.reshape(-1,IMG_SIZE,IMG_SIZE,3)
            pred = model.predict(img)
            str_label = 'dog' if pred > 0.5 else 'cat'
            ax[i//5, i%5].set_title("Label: {}".format(str_label))
    plt.show()

In [None]:
X, y = procces_data(TRAIN_FOLDER)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)

## Visualizing Training Set

In [None]:
show_images(X)

## Model

In [None]:
resnet50 = tf.keras.applications.ResNet50(
    include_top=False,
    weights="imagenet",
    pooling="max",
    input_shape=(IMG_SIZE, IMG_SIZE, 3)
)

model_resnet50 = tf. keras.models.Sequential()
model_resnet50.add(resnet50)
model_resnet50.add(tf.keras.layers.Dense(1, activation='sigmoid'))

opt = tf.keras.optimizers.SGD()
loss = tf.keras.losses.BinaryCrossentropy()
metrics = ['accuracy']

model_resnet50.compile(optimizer=opt, loss=loss, metrics=metrics)

model_resnet50.summary()

## Data generator

In [None]:
class DataGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(
            self, 
            images_paths, 
            labels, 
            batch_size=64, 
        ):
        self.labels = labels
        self.images_paths = images_paths 
        self.batch_size = batch_size
        self.on_epoch_end()


    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.images_paths) / self.batch_size))

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.images_paths))

    def __getitem__(self, index):
        'Generate one batch of data'
        indexes = self.indexes[index * self.batch_size: (index + 1) * self.batch_size]
        labels = np.array([self.labels[k] for k in indexes])
        images = np.array([self.images_paths[k].astype('float32') for k in indexes])
        return images, labels


In [None]:
train_data = DataGenerator(
    images_paths=X_train, 
    labels=y_train, 
    batch_size=BATCH_SIZE, 
)
val_data = DataGenerator(
    images_paths=X_test, 
    labels=y_test, 
    batch_size=BATCH_SIZE, 
)

## Train and visualizing loss and accuracy curves

In [None]:
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    './cat_dogs_resnet50_checkpoint.hdf5',
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    mode='min',
    save_weights_only=True
)

history = model_resnet50.fit_generator(
    generator=train_data,
    validation_data=val_data,
    epochs=EPOCHS,
    steps_per_epoch=len(train_data),
    validation_steps=len(val_data),
    #callbacks=[checkpoint],
)
model_resnet50.save_weights('./cat_dogs_resnet50.hdf5')

In [None]:
def plot_accuracy_and_loss(train_model):
    hist = train_model.history.history
    acc = hist['accuracy']
    val_acc = hist['val_accuracy']
    loss = hist['loss']
    val_loss = hist['val_loss']
    epochs = range(len(acc))
    f, ax = plt.subplots(1,2, figsize=(14,6))
    ax[0].plot(epochs, acc, 'g', label='Training accuracy')
    ax[0].plot(epochs, val_acc, 'r', label='Validation accuracy')
    ax[0].set_title('Training and validation accuracy')
    ax[0].legend()
    ax[1].plot(epochs, loss, 'g', label='Training loss')
    ax[1].plot(epochs, val_loss, 'r', label='Validation loss')
    ax[1].set_title('Training and validation loss')
    ax[1].legend()
    plt.show()
plot_accuracy_and_loss(model_resnet50)

In [None]:
score = model_resnet50.evaluate(val_data, verbose=0)
print('Validation accuracy:', score[1])

## Visualizing predictions

In [None]:
show_images(
    [cv2.imread(os.path.join(TEST_FOLDER, image_path)) for image_path in os.listdir(TEST_FOLDER)[:25]],
    isTrain=True,
    model=model_resnet50
)

In [None]:
pred_list = []
img_list = []
for image_path in tqdm(os.listdir(TEST_FOLDER)):
    image = cv2.imread(os.path.join(TEST_FOLDER, image_path))
    image = imageprocess(image).astype('float32')
    image = image.reshape(-1,IMG_SIZE,IMG_SIZE,3)
    pred = model_resnet50.predict(image)[0]
    pred_list.append(pred[0])
    img_list.append(image_path[:-4])

In [None]:
submission = pd.DataFrame({'id':img_list , 'label':pred_list})
submission['id'] = submission['id'].astype(int)
submission = submission.sort_values(by=['id'])
submission.to_csv("submission.csv", index=False)