In [1]:
import os, shutil
from tensorflow import keras

print(keras.__version__)

2.2.4-tf


## Prepare data

In [2]:
# make directory for original dataset
original_dataset_dir = 'data/cats_and_dogs/train'
os.makedirs(original_dataset_dir, exist_ok=True)

# download original dataset - takes very long
# !kaggle competitions download -c dogs-vs-cats --path data/cats_and_dogs/train


# make directory for small dataset (split into train, val, test)
base_dir = 'data/cats_and_dogs_small'

start_idx = 0
end_idx = 0
for split, num_img in zip(['train', 'validation', 'test'], [1000, 500, 500]):
    start_idx = end_idx
    end_idx += num_img
    
    for target in ['cat', 'dog']:
        new_dir_name = os.path.join(base_dir, split, target)
        os.makedirs(new_dir_name, exist_ok=True)
        
        tmp_fnames = [ f'{target}.{idx}.jpg' for idx in range(start_idx, end_idx) ]
        for fname in tmp_fnames:
            src = os.path.join(original_dataset_dir, fname)
            dst = os.path.join(new_dir_name,  fname)
            shutil.copyfile(src, dst)
        
        print(f'number of images of {target} for {split} : {len(os.listdir(new_dir_name))}')
    print('\n===========================\n')
    pass

number of images of cat for train : 1000
number of images of dog for train : 1000


number of images of cat for validation : 500
number of images of dog for validation : 500


number of images of cat for test : 500
number of images of dog for test : 500




## Preprocess data

In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_dir = os.path.join(base_dir, 'train')
train_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(train_dir,
                                                    target_size=(150, 150),
                                                    batch_size=20,
                                                    class_mode='binary')

validation_dir = os.path.join(base_dir, 'validation')
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(test_dir,
                                                  target_size=(150, 150),
                                                  batch_size=20,
                                                  class_mode='binary')


In [None]:
for data_batch, labels_batch in train_generator:
    print('shape of batch data  :', data_batch.shape)
    print('shape of batch label :', labels_batch.shape)

## Generate a neural network model

In [6]:
model = keras.models.Sequential()
model.add( keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu',
                               input_shape=(150, 150, 3)) )
model.add( keras.layers.MaxPooling2D(pool_size=(2,2)) )
model.add( keras.layers.Conv2D(filters=64, kernel_size=(3,3), activation='relu') )
model.add( keras.layers.MaxPooling2D(pool_size=(2,2)) )
model.add( keras.layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu') )
model.add( keras.layers.MaxPooling2D(pool_size=(2,2)) )
model.add( keras.layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu') )
model.add( keras.layers.MaxPooling2D(pool_size=(2,2)) )

model.add( keras.layers.Flatten() )
model.add( keras.layers.Dense(units=512, activation='relu') )
model.add( keras.layers.Dense(units=1, activation='sigmoid') )

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 72, 72, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 15, 15, 128)      

## Compile, Train, and Save the model

## Visualize the result