In [None]:
%matplotlib widget
import os
import shutil
import time
import urllib
import zipfile
from matplotlib import pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, utils

In [None]:
def download():
    """Creates notebooks/kaggle/dogs-vs-cats/dogs-vs-cats.zip"""
    # - Visit https://www.kaggle.com/
    # - login,
    # - Go to your account
    # - Click "Create New API Token"
    # - Put the file in ~/.kaggle
    # - chmod 600 ~/.kaggle/kaggle.json
    # - Visit https://www.kaggle.com/c/dogs-vs-cats/rules
    # - Click "I understand and accept"
    !kaggle competitions download -c dogs-vs-cats -p kaggle/dogs-vs-cats

def extract():
    """Doing the reverse and opening each file one at a time is notoriously slow in python(!)"""
    for t in ("cat", "dog"):
        for d in ("train", "validation", "test"):
            os.makedirs(f"kaggle/dogs-vs-cats/{d}/{t}", exist_ok=True) 
    with zipfile.ZipFile("kaggle/dogs-vs-cats/dogs-vs-cats.zip") as z1:
        with zipfile.ZipFile(z1.open("train.zip")) as z:
            for inf in z.infolist():
                n = inf.filename
                if not n.startswith("train/") or not n.endswith(".jpg"):
                    continue
                n = n[6:]
                p = n.split(".")
                i = int(p[1])
                if i < 1000:
                    with open("kaggle/dogs-vs-cats/train/%s/%s.jpg" % (p[0], p[1]), "wb") as f:
                        f.write(z.read(inf))
                elif i < 1500:
                    with open("kaggle/dogs-vs-cats/validation/%s/%s.jpg" % (p[0], p[1]), "wb") as f:
                        f.write(z.read(inf))
                elif i < 2500:
                    with open("kaggle/dogs-vs-cats/test/%s/%s.jpg" % (p[0], p[1]), "wb") as f:
                        f.write(z.read(inf))

if not os.path.isfile("kaggle/dogs-vs-cats/test/dog/2499.jpg"):
    download()
    extract()

In [None]:
img_size = 256 # 180
color_mode = "rgb"
train_dataset = utils.image_dataset_from_directory("kaggle/dogs-vs-cats/train", image_size=(img_size, img_size), batch_size=32, color_mode=color_mode)
validation_dataset = utils.image_dataset_from_directory("kaggle/dogs-vs-cats/validation", image_size=(img_size, img_size), batch_size=32, color_mode=color_mode)
test_dataset = utils.image_dataset_from_directory("kaggle/dogs-vs-cats/test", image_size=(img_size, img_size), batch_size=32, color_mode=color_mode)

def test_model(p):
    test_mdl = keras.models.load_model(p)
    test_loss, test_acc = test_mdl.evaluate(test_dataset, verbose=0)
    print(f"Test accuracy: {test_acc:.3f}")

def draw_hist(h):
    acc = h.history["accuracy"]
    val_acc = h.history["val_accuracy"]
    loss = h.history["loss"]
    val_loss = h.history["val_loss"]
    epochs = range(1, len(acc) + 1)

    #plt.clf()
    plt.figure(figsize=(10, 5))
    ax = plt.subplot(1, 2, 1)
    plt.plot(epochs, acc, "bo", label="Training accuracy")
    plt.plot(epochs, val_acc, "b", label="Validation accuracy")
    plt.title("Training and validation accuracy")
    plt.legend()

    #plt.figure()
    ax = plt.subplot(1, 2, 2)
    plt.plot(epochs, loss, "bo", label="Training loss")
    plt.plot(epochs, val_loss, "b", label="Validation loss")
    plt.title("Training and validation loss")
    plt.legend()
    plt.show()

def fit_first(p):
    inputs = keras.Input(shape=(img_size, img_size, 3))
    x = layers.Rescaling(1./255)(inputs)
    x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x)
    x = layers.MaxPooling2D(pool_size=2)(x)
    x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x)
    x = layers.MaxPooling2D(pool_size=2)(x)
    x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x)
    x = layers.MaxPooling2D(pool_size=2)(x)
    x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
    x = layers.MaxPooling2D(pool_size=2)(x)
    x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
    x = layers.Flatten()(x)
    outputs = layers.Dense(1, activation="sigmoid")(x)
    mdl = keras.Model(inputs=inputs, outputs=outputs)
    mdl.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"])

    cb = [
        keras.callbacks.ModelCheckpoint(
            filepath=p,
            save_best_only=True,
            monitor="val_loss")
    ]
    # epochs=10
    return mdl.fit(train_dataset, epochs=10, validation_data=validation_dataset, callbacks=cb)

data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
        layers.RandomZoom(0.2),
    ])

def fit_aug(p):
    inputs = keras.Input(shape=(img_size, img_size, 3))
    x = data_augmentation(inputs)
    x = layers.Rescaling(1./255)(x)
    x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(x)
    x = layers.MaxPooling2D(pool_size=2)(x)
    x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x)
    x = layers.MaxPooling2D(pool_size=2)(x)
    x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x)
    x = layers.MaxPooling2D(pool_size=2)(x)
    x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
    x = layers.MaxPooling2D(pool_size=2)(x)
    x = layers.Conv2D(filters=256, kernel_size=3, activation="relu")(x)
    x = layers.Flatten()(x)
    outputs = layers.Dense(1, activation="sigmoid")(x)
    mdl2 = keras.Model(inputs=inputs, outputs=outputs)
    mdl2.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"])
    cb2 = [
        keras.callbacks.ModelCheckpoint(
            filepath=p,
            save_best_only=True,
            monitor="val_loss")
    ]
    # epochs=100
    return mdl2.fit(train_dataset, epochs=40, validation_data=validation_dataset, callbacks=cb2)

def draw_augmented_images():
    plt.figure(figsize=(10, 10))
    for imgs, _ in train_dataset.take(1):
        for i in range(9):
            aug_imgs = data_augmentation(imgs)
            ax = plt.subplot(3, 3, i+1)
            plt.imshow(aug_imgs[0].numpy().astype("uint8"))
            plt.axis("off")

In [None]:
hist = fit_first(f"out/chap8_{img_size}.keras")

In [None]:
draw_hist(hist)

In [None]:
test_model(f"out/chap8_{img_size}.keras")

In [None]:
draw_augmented_images()

In [None]:
hist2 = fit_aug(f"out/chap8_{img_size}_aug.keras")

In [None]:
draw_hist(hist2)

In [None]:
test_model(f"out/chap8_{img_size}_aug.keras")

In [None]:
conv_base = keras.applications.vgg16.VGG16(
    weights="imagenet",
    include_top=False,
    input_shape=(img_size, img_size, 3))
#print(conv_base.summary())
# TODO(maruel): Finish chap 8 for feature extraction if desired.