Things to learn in this section:

✔️ Build and train models to process real-world image datasets.  
✔️ Use real-world images in different shapes and sizes.  
✔️ Use image augmentation to prevent overfitting.  
✔️ Use ImageDataGenerator.  
✔️ Understand how ImageDataGenerator labels images based on the directory structure.  

In [1]:
# run only once
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

In [82]:
import os
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop

## Dataset

In [109]:
from pathlib import Path
home = str(Path.home())
base_dir = os.path.join(home, 'datasets/dogs-vs-cats')

In [110]:
path_to_zip_file = os.path.join(base_dir, 'train.zip') 
directory_to_extract_to = os.path.join(base_dir, 'train')

In [111]:
# unzip files
import zipfile
if not os.path.exists(directory_to_extract_to):
    with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
        zip_ref.extractall(base_dir)

In [112]:
all_files = os.listdir(directory_to_extract_to)
cat_files = [f for f in all_files if 'cat' in f]
dog_files = [f for f in all_files if 'dog' in f]

In [113]:
val_size = int(len(cat_files)//4)

# Split data into train and validation

In [114]:
import random
random.shuffle(cat_files)
random.shuffle(dog_files)
cat_files_validation = cat_files[:val_size]
dog_files_validation = dog_files[:val_size]

## Define Data Directory

    .
    ├── test
    ├── train
    │   ├── cats
    │   └── dogs
    └── validation
        ├── cats
        └── dogs

In [115]:
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')

# Directory with our training cat/dog pictures
train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')

# Directory with our validation cat/dog pictures
validation_cats_dir = os.path.join(validation_dir, 'cats')
validation_dogs_dir = os.path.join(validation_dir, 'dogs')

if not os.path.exists(validation_dir):
    os.mkdir(validation_dir)
if not os.path.exists(validation_cats_dir):
    os.mkdir(validation_cats_dir)
if not os.path.exists(validation_dogs_dir):
    os.mkdir(validation_dogs_dir)
if not os.path.exists(train_dir):
    os.mkdir(train_dir)
if not os.path.exists(train_cats_dir):
    os.mkdir(train_cats_dir)
if not os.path.exists(train_dogs_dir):
    os.mkdir(train_dogs_dir)

In [119]:
from shutil import move

# cats
for i in cat_files_validation:
    src=os.path.join(train_dir, i)
    dest=os.path.join(validation_cats_dir, i)
    move(src, dest)
    
cats_files_training = [f for f in cat_files if f not in cat_files_validation]
for i in cats_files_training:
    src=os.path.join(train_dir, i)
    dest=os.path.join(train_cats_dir, i)
    move(src, dest)    

In [120]:
# dogs
for i in dog_files_validation:
    src=os.path.join(train_dir, i)
    dest=os.path.join(validation_dogs_dir, i)
    move(src, dest) 

dog_files_training = [f for f in dog_files if f not in dog_files_validation]
for i in dog_files_training:
    src=os.path.join(train_dir, i)
    dest=os.path.join(train_dogs_dir, i)
    move(src, dest)

In [121]:
print(len(os.listdir(train_cats_dir)))
print(len(os.listdir(train_dogs_dir)))
print(len(os.listdir(validation_cats_dir)))
print(len(os.listdir(validation_dogs_dir)))

9375
9375
3125
3125


## Build a Model

In [122]:
# build model
model = Sequential([
    Conv2D(8, (3,3), activation='relu', input_shape = (64,64,3)),
    MaxPooling2D((3,3)),
    
    Conv2D(16, (3,3), activation='relu'),
    MaxPooling2D((3,3)),    
    
    Flatten(),
    Dense(512, activation=tf.nn.relu),
    Dense(128, activation=tf.nn.relu),
    Dense(1, activation=tf.nn.sigmoid)
])

model.compile(optimizer=RMSprop(lr=0.001),
              loss='binary_crossentropy',
              metrics = ['accuracy'])

- __Use ImageDataGenerator__
- __Understand how ImageDataGenerator labels images based on the directory structure.__


In [123]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

- __Use image augmentation to prevent overfitting.__

In [124]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

validation_datagen = ImageDataGenerator(rescale = 1.0/255.)

In [125]:
train_generator = train_datagen.flow_from_directory(
    train_dir,
    batch_size=20,
    class_mode='binary',
    target_size=(64,64)
)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    batch_size=20,
    class_mode='binary',
    target_size=(64,64)
)

Found 18750 images belonging to 2 classes.
Found 6250 images belonging to 2 classes.


In [126]:
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=15,
    steps_per_epoch=100,
    validation_steps=50,
    verbose=2
)

Train for 100 steps, validate for 50 steps
Epoch 1/15
100/100 - 49s - loss: 0.7054 - accuracy: 0.5300 - val_loss: 0.6728 - val_accuracy: 0.6110
Epoch 2/15
100/100 - 51s - loss: 0.6833 - accuracy: 0.5510 - val_loss: 0.6620 - val_accuracy: 0.5820
Epoch 3/15
100/100 - 50s - loss: 0.6611 - accuracy: 0.6000 - val_loss: 0.6628 - val_accuracy: 0.5980
Epoch 4/15
100/100 - 51s - loss: 0.6508 - accuracy: 0.6030 - val_loss: 0.6729 - val_accuracy: 0.5540
Epoch 5/15
100/100 - 50s - loss: 0.6541 - accuracy: 0.6200 - val_loss: 0.6116 - val_accuracy: 0.6810
Epoch 6/15
100/100 - 50s - loss: 0.6367 - accuracy: 0.6325 - val_loss: 0.6982 - val_accuracy: 0.5760
Epoch 7/15
100/100 - 51s - loss: 0.6440 - accuracy: 0.6255 - val_loss: 0.6040 - val_accuracy: 0.6830
Epoch 8/15
100/100 - 49s - loss: 0.6354 - accuracy: 0.6375 - val_loss: 0.5866 - val_accuracy: 0.6970
Epoch 9/15
100/100 - 50s - loss: 0.6230 - accuracy: 0.6465 - val_loss: 0.5897 - val_accuracy: 0.6880
Epoch 10/15
100/100 - 51s - loss: 0.6279 - accur