In [None]:
import os
from pathlib import Path
from zipfile import ZipFile
import shutil

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf

In [None]:
data_dir = Path('/kaggle/working/data')
data_dir.mkdir()

In [None]:
with ZipFile('/kaggle/input/dogs-vs-cats-redux-kernels-edition/train.zip') as train_zip:
    train_zip.extractall(path=data_dir)

train_dir = data_dir / 'train'
len(list(train_dir.iterdir()))

In [None]:
with ZipFile('/kaggle/input/dogs-vs-cats-redux-kernels-edition/test.zip') as test_zip:
    test_zip.extractall(path=data_dir)

test_dir = data_dir / 'test'
len(list(test_dir.iterdir()))

In [None]:
cat_dir = train_dir / 'cat'
dog_dir = train_dir / 'dog'

cat_dir.mkdir()
dog_dir.mkdir()

In [None]:
for image_path in train_dir.glob('*.jpg'):
    src = str(image_path)
    if 'cat' in src:
        shutil.move(src, str(cat_dir))
    else:
        shutil.move(src, str(dog_dir))

In [None]:
print(len(list(cat_dir.iterdir())))
print(len(list(dog_dir.iterdir())))

In [None]:
BATCH_SIZE = 32
IMAGE_SIZE = (160, 160)

In [None]:
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    str(train_dir),
    validation_split=0.2,
    subset="training",
    seed=33,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,)

validation_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    str(train_dir),
    validation_split=0.2,
    subset="validation",
    seed=33,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,)

In [None]:
class_names = train_dataset.class_names

plt.figure(figsize=(10, 10))
for images, labels in train_dataset.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(class_names[labels[i]])
        plt.axis("off")

In [None]:
train_dataset = train_dataset.cache().prefetch(buffer_size=tf.data.AUTOTUNE)
validation_dataset = validation_dataset.cache().prefetch(buffer_size=tf.data.AUTOTUNE)

In [None]:
data_augmentation = tf.keras.Sequential(
  [
    tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal"),
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.1),
    tf.keras.layers.experimental.preprocessing.RandomZoom(0.1),
  ]
)

In [None]:
plt.figure(figsize=(10, 10))
for image_batch, _ in train_dataset.take(1):
    first_image = image_batch[0]
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        augmented_image = data_augmentation(tf.expand_dims(first_image, 0))
        plt.imshow(augmented_image[0].numpy().astype('uint8'))
        plt.axis('off')

## Transfer Learning

In [None]:
base_model = tf.keras.applications.MobileNetV2(
    input_shape=IMAGE_SIZE + (3,),
    include_top=False,
    weights='imagenet')

In [None]:
image_batch, label_batch = next(iter(train_dataset))
feature_batch = base_model(image_batch)
print(feature_batch.shape)

In [None]:
base_model.trainable = False

In [None]:
base_model.summary()

In [None]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
print(feature_batch_average.shape)

In [None]:
prediction_layer = tf.keras.layers.Dense(1)
prediction_batch = prediction_layer(feature_batch_average)
print(prediction_batch.shape)

In [None]:
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input

In [None]:
inputs = tf.keras.Input(shape=IMAGE_SIZE + (3,))
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = global_average_layer(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs)

In [None]:
model.summary()

In [None]:
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
loss0, accuracy0 = model.evaluate(validation_dataset)

In [None]:
print("initial loss: {:.2f}".format(loss0))
print("initial accuracy: {:.2f}".format(accuracy0))

In [None]:
EPOCHS = 20
history = model.fit(train_dataset,
                    epochs=EPOCHS,
                    validation_data=validation_dataset)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.xticks(list(range(20)))
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xticks(list(range(20)))
plt.xlabel('Epoch')
plt.show()

## Fine Tuning

In [None]:
base_model.trainable = True

In [None]:
print("Number of layers in the base model: ", len(base_model.layers))

In [None]:
fine_tune_from = 100

# Freeze all the layers before the `fine_tune_at` layer
for layer in base_model.layers[:fine_tune_from]:
    layer.trainable =  False

In [None]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer = tf.keras.optimizers.RMSprop(lr=base_learning_rate/10),
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
fine_tune_epochs = 10
total_epochs = EPOCHS + fine_tune_epochs

history_fine = model.fit(train_dataset,
                         epochs=total_epochs,
                         initial_epoch=history.epoch[-1],
                         validation_data=validation_dataset)

In [None]:
acc += history_fine.history['accuracy']
val_acc += history_fine.history['val_accuracy']

loss += history_fine.history['loss']
val_loss += history_fine.history['val_loss']

In [None]:
plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.ylim([0.8, 1])
plt.plot([EPOCHS-1,EPOCHS-1],
          plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.ylim([0, 1.0])
plt.plot([EPOCHS-1,EPOCHS-1],
         plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [None]:
test_dataset = tf.data.Dataset.list_files(str(test_dir / '*.jpg'))

In [None]:
tf.data.Dataset.list_files(str(test_dir / '*.jpg'))

In [None]:
# Reads an image from a file, decodes it into a dense tensor, and resizes it
# to a fixed shape.
def parse_image(filename):
    parts = tf.strings.split(filename, os.sep)
    label = tf.strings.split(parts[-1], '.')[0]

    image = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(image)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, IMAGE_SIZE)
    
    return image, label

In [None]:
test_dataset = test_dataset.map(parse_image).batch(BATCH_SIZE)

In [None]:
plt.figure(figsize=(10, 10))
for images, labels in test_dataset.as_numpy_iterator():
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i])
        plt.title(int(labels[i].decode('UTF-8')))
        plt.axis("off")
    break

In [None]:
[] + [1,2,3] + [5,5]

In [None]:
image_ids = []
logits = []
predictions = []

for image_batch, id_batch in test_dataset.as_numpy_iterator():
    batch_predictions = model.predict_on_batch(image_batch)
    batch_predictions = batch_predictions.flatten()
    
    logits += batch_predictions.tolist()
    batch_predictions = tf.nn.sigmoid(batch_predictions)
    
    predictions += batch_predictions.numpy().tolist()
    image_ids += id_batch.tolist()

In [None]:
submission = pd.DataFrame({'id': image_ids, 'label': predictions, 'logits': logits})

In [None]:
submission.describe()

In [None]:
submission.head()

In [None]:
submission.id = submission.id.astype(int)

In [None]:
submission.info()

In [None]:
submission[['id', 'label']].to_csv('submission.csv', index=False)