# CNN: Cats And Dogs

## Data Sets

> directory structure
```
./cats_and_dogs
├─test_set
│  ├─cats
│  │  └─cat.{id}.jpg ...
│  └─dogs
│     └─dog.{id}.jpg ...
├─training_set
│  ├─cats
│  │  └─cat.{id}.jpg ...
│  └─dogs
│     └─dog.{id}.jpg ...
└─validation_set
   ├─cats
   │  └─cat.{id}.jpg ...
   └─dogs
      └─dog.{id}.jpg ...
```

## Load Data Sets, Data Argumentation

In [1]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.keras.preprocessing.image import DirectoryIterator

DATA_DIR = 'cats_and_dogs'

train_dir = os.path.join(DATA_DIR, 'training_set')
valid_dir = os.path.join(DATA_DIR, 'validation_set')
test_dir = os.path.join(DATA_DIR, 'test_set')

# image generate for Data Argumentation
img_generator = ImageDataGenerator(
    rescale=1. / 255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    brightness_range=[.2, .2],
    horizontal_flip=True,
    validation_split=.1
)

# Generate train data
train_set: DirectoryIterator = img_generator.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    subset='training'
)

# Generate validation data
valid_set: DirectoryIterator = img_generator.flow_from_directory(
    valid_dir,
    target_size=(128, 128),
    subset='validation'
)

# Generate test data
# raw image
test_generator = ImageDataGenerator(
    rescale=1. / 255
)

test_set: DirectoryIterator = test_generator.flow_from_directory(
    test_dir,
    target_size=(128, 128)
)

Found 7205 images belonging to 2 classes.
Found 800 images belonging to 2 classes.
Found 2023 images belonging to 2 classes.


### Image Generator Parameter
- `rescale` use for normalization, generated raw image data divide to 255.
- `rotation_range` is rotated image to maximum 20 degree.
- `brightness_range` is about brightness.
- `horizontal_flip` is flips the image horizontally.
- `validation_split` specifies the percentage of validation set.

**flow_from_directory**
- first parameter is data directory
- target_size is image size(maybe pixel?)
- subset is just labeling

## Configuration of CNN Model

In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers


def config_model(model: Sequential, dropout: float = 0.3) -> Sequential:
    # input layer + convolution layer
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)))
    # tf.keras.layers.Dropout(0.3),
    model.add(layers.MaxPooling2D(2, 2))
    model.add(layers.BatchNormalization())

    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    # tf.keras.layers.Dropout(0.3),
    model.add(layers.MaxPooling2D(2, 2))
    model.add(layers.BatchNormalization())

    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    # model.add(layers.Dropout(0.3))
    model.add(layers.MaxPooling2D(2, 2))
    model.add(layers.BatchNormalization())

    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    # model.add(layers.Dropout(0.3))
    model.add(layers.MaxPooling2D(2, 2))
    model.add(layers.BatchNormalization())

    # Flatten the results to feed into a DNN
    model.add(layers.Flatten())
    # 512 neuron hidden layer
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dropout(dropout))
    # 2 output
    model.add(layers.Dense(2, activation='sigmoid'))

    model.summary()

    return model

In [3]:
# check train samples
train_set.samples

7205

### Model Compile

In [4]:
from tensorflow.keras.optimizers import RMSprop


def compile_model(model: Sequential, learning_rate: float = 0.1) -> Sequential:
    model.compile(
        optimizer=RMSprop(learning_rate=learning_rate),
        loss='binary_crossentropy',
        metrics=['acc']
    )

    return model

## Training

In [5]:
from tensorflow.python.keras.callbacks import History


def train(model: Sequential, train_data: DirectoryIterator, valid_data: DirectoryIterator, epochs: int = 1) -> History:
    history = model.fit(
        train_data,
        epochs=epochs,
        validation_data=valid_data,
        verbose=1
    )

    return history

## Show Train Result

In [6]:
import matplotlib.pyplot as plt


def show_plot(history: History):
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs = range(len(acc))

    plt.plot(epochs, acc, label='train')
    plt.plot(epochs, val_acc, label='valid')
    plt.title('Training and validation accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend(loc='upper left')
    plt.figure()

    plt.plot(epochs, loss, 'bo', label='train')
    plt.plot(epochs, val_loss, 'b', label='valid')
    plt.title('Training and validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend(loc='upper left')


## Evaluate the model

In [7]:
def evaluate_model(model: Sequential, test_data: DirectoryIterator):
    evaluate = model.evaluate(test_data)
    print('loss: {}, acc: {}'.format(*evaluate))
    return evaluate

## Predict the model
print first predict

In [8]:
from tensorflow.keras.preprocessing.image import array_to_img
import numpy as np


# print first image
def predict_model(model: Sequential, test_data: DirectoryIterator):
    label = ['cat', 'dog']
    images = test_data.next()
    arr = images[0][0]
    img = array_to_img(arr).resize((128, 128))
    plt.imshow(img)
    result = model.predict(arr.reshape(1, 128, 128, 3))
    print('predict: {}'.format(label[result[0]]))
    print('true: {}'.format(label[np.argmax(images[1][0])]))

## Script

### Setting Model Parameter
> Currently, the best performing parameters

In [11]:
dropout = 0.3
learning_rate = 0.0007
epoch = 30

### Run Script

In [None]:
def run(dr: float, lr: float, ep: int):
    model = config_model(model=Sequential(), dropout=dr)
    model = compile_model(model, learning_rate=lr)
    history = train(model, train_data=train_set, valid_data=valid_set, epochs=ep)
    show_plot(history)
    evaluate_model(model, test_data=test_set)
    predict_model(model, test_data=test_set)


run(dropout, learning_rate, epoch)



--------------------------------------------------
SEQ  1
dropout:  0.3
learning_rate:  0.0001
epoch:  2
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 64, 64, 16)        448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 32, 32, 16)        0         
_________________________________________________________________
dropout (Dropout)            (None, 32, 32, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 32)        4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 16, 16, 32)        0         
_________________