In [2]:
import pathlib
import tensorflow as tf

## Load data

In [3]:
_URL = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'

path_to_zip = tf.keras.utils.get_file('cats_and_dogs.zip', origin=_URL, extract=True)

path_to_zip = pathlib.Path(path_to_zip)
PATH = path_to_zip.parents[0]/'cats_and_dogs_filtered'

In [4]:
train_dir = PATH/'train'
validation_dir = PATH/'validation'

In [5]:
train_cats_dir = train_dir/'cats'
train_dogs_dir = train_dir/'dogs'
validation_cats_dir = validation_dir/'cats'
validation_dogs_dir = validation_dir/'dogs'

In [6]:
# Global setting
BATCH_SIZE = 128
EPOCHS = 15
IMG_HEIGHT = 150
IMG_WIDTH = 150

## Data preparation

In [7]:
train_image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
validation_image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

In [8]:
train_data_gen = train_image_generator.flow_from_directory(
    train_dir,
    target_size=[IMG_HEIGHT, IMG_WIDTH],
    batch_size=BATCH_SIZE,
    shuffle=True,
    class_mode='binary'
)

Found 2000 images belonging to 2 classes.


In [9]:
val_data_gen = validation_image_generator.flow_from_directory(
    validation_dir,
    target_size=[IMG_HEIGHT, IMG_WIDTH],
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

Found 1000 images belonging to 2 classes.


## Create the model

In [40]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16, (3,3), padding='same', activation='relu', input_shape=[IMG_HEIGHT, IMG_WIDTH, 3]),
    tf.keras.layers.MaxPool2D((2,2)),
    tf.keras.layers.Conv2D(32, (3,3), padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D((2,2)),
    tf.keras.layers.Conv2D(64, (3,3), padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D((2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(1)
])

In [41]:
model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), metrics=['accuracy'])

In [59]:
model.fit(
    train_data_gen,
    steps_per_epoch=len(list(train_dir.rglob('*.jpg'))) // BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=val_data_gen,
    validation_steps=len(list(validation_dir.rglob('*.jpg'))) // BATCH_SIZE
)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x7f5edd4d1850>

## Overfitting

### Data augmentation

#### Apply horizontal flip

In [62]:
image_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True
)

train_data_gen = image_gen.flow_from_directory(
    directory=train_dir,
    target_size=[IMG_HEIGHT, IMG_WIDTH],
    batch_size=BATCH_SIZE,
    shuffle=True
)

Found 2000 images belonging to 2 classes.


#### Randomly rotate the image

In [109]:
image_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=45
)

train_data_gen = image_gen.flow_from_directory(
    directory=train_dir,
    target_size=[IMG_HEIGHT, IMG_WIDTH],
    batch_size=BATCH_SIZE,
    shuffle=True
)

Found 2000 images belonging to 2 classes.


#### Apply zoom augmentation

In [110]:
image_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    zoom_range=0.5
)

train_data_gen = image_gen.flow_from_directory(
    directory=train_dir,
    target_size=[IMG_HEIGHT, IMG_WIDTH],
    batch_size=BATCH_SIZE,
    shuffle=True
)

Found 2000 images belonging to 2 classes.


#### Put it all together

In [11]:
image_gen_train = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=45,
    width_shift_range=.15,
    height_shift_range=.15,
    zoom_range=0.5,
    horizontal_flip=True,
    vertical_flip=True
)

train_data_gen = image_gen_train.flow_from_directory(
    directory=train_dir,
    target_size=[IMG_HEIGHT, IMG_WIDTH],
    batch_size=BATCH_SIZE,
    shuffle=True
)

Found 2000 images belonging to 2 classes.


### Dropout

In [12]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16, (5,5), padding='same', activation='relu', input_shape=[IMG_HEIGHT, IMG_WIDTH, 3]),
    tf.keras.layers.MaxPool2D((2,2)),
    tf.keras.layers.Conv2D(32, (5,5), padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D((2,2)),
    tf.keras.layers.Conv2D(64, (5,5), padding='same', activation='relu'),
    tf.keras.layers.MaxPool2D((2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1)
])

In [15]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=['accuracy']
)

### Train a new model

In [16]:
with tf.device('GPU:0'):
    model.fit(
    train_data_gen,
    steps_per_epoch=len(list(train_dir.rglob('*.jpg'))) // BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=val_data_gen,
    validation_steps=len(list(validation_dir.rglob('*.jpg'))) // BATCH_SIZE
)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15

KeyboardInterrupt: 