<a href="https://colab.research.google.com/github/kisuya/dnn_examples/blob/main/Dog_vs_Cat.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

References.
* https://colab.research.google.com/github/lmoroney/mlday-tokyo/blob/master/Lab6-Cats-v-Dogs.ipynb#scrollTo=MWZrJN4-65RC
* https://machinelearningmastery.com/how-to-develop-a-convolutional-neural-network-to-classify-photos-of-dogs-and-cats/
* https://tensorflow.google.cn/tutorials/images/transfer_learning_with_hub?hl=ko#%EC%84%A4%EC%B9%98%ED%95%98%EA%B8%B0
* https://www.tensorflow.org/guide/keras/save_and_serialize?hl=ko
* https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html


In [None]:
import sys
import os
import zipfile
import random
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile
from matplotlib import pyplot

In [None]:
# If the URL doesn't work, visit https://www.microsoft.com/en-us/download/confirmation.aspx?id=54765
# And right click on the 'Download Manually' link to get a new URL to the dataset

# Note: This is a very large dataset and will take time to download

!wget --no-check-certificate \
    "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip" \
    -O "/tmp/cats-and-dogs.zip"

local_zip = '/tmp/cats-and-dogs.zip'
zip_ref   = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()

In [None]:
print(len(os.listdir('/tmp/PetImages/Cat/')))
print(len(os.listdir('/tmp/PetImages/Dog/')))

# Expected Output:
# 12501
# 12501

In [None]:
try:
    os.mkdir('/tmp/cats-v-dogs')
    os.mkdir('/tmp/cats-v-dogs/train')
    os.mkdir('/tmp/cats-v-dogs/test')
    os.mkdir('/tmp/cats-v-dogs/train/cats')
    os.mkdir('/tmp/cats-v-dogs/train/dogs')
    os.mkdir('/tmp/cats-v-dogs/test/cats')
    os.mkdir('/tmp/cats-v-dogs/test/dogs')
except OSError:
    pass

In [None]:
def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
    files = []
    for filename in os.listdir(SOURCE):
        file = SOURCE + filename
        if os.path.getsize(file) > 0:
            files.append(filename)
        else:
            print(filename + " is zero length, so ignoring.")

    training_length = int(len(files) * SPLIT_SIZE)
    testing_length = int(len(files) - training_length)
    shuffled_set = random.sample(files, len(files))
    training_set = shuffled_set[0:training_length]
    testing_set = shuffled_set[-testing_length:]

    for filename in training_set:
        this_file = SOURCE + filename
        destination = TRAINING + filename
        copyfile(this_file, destination)

    for filename in testing_set:
        this_file = SOURCE + filename
        destination = TESTING + filename
        copyfile(this_file, destination)

In [None]:
CAT_SOURCE_DIR = "/tmp/PetImages/Cat/"
TRAIN_CATS_DIR = "/tmp/cats-v-dogs/train/cats/"
TEST_CATS_DIR = "/tmp/cats-v-dogs/test/cats/"
DOG_SOURCE_DIR = "/tmp/PetImages/Dog/"
TRAIN_DOGS_DIR = "/tmp/cats-v-dogs/train/dogs/"
TEST_DOGS_DIR = "/tmp/cats-v-dogs/test/dogs/"

split_size = .9
split_data(CAT_SOURCE_DIR, TRAIN_CATS_DIR, TEST_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAIN_DOGS_DIR, TEST_DOGS_DIR, split_size)

# Expected output
# 666.jpg is zero length, so ignoring
# 11702.jpg is zero length, so ignoring

In [None]:
print(len(os.listdir('/tmp/cats-v-dogs/train/cats/')))
print(len(os.listdir('/tmp/cats-v-dogs/train/dogs/')))
print(len(os.listdir('/tmp/cats-v-dogs/test/cats/')))
print(len(os.listdir('/tmp/cats-v-dogs/test/dogs/')))

# Expected output:
# 11250
# 11250
# 1250
# 1250

In [None]:
!apt-get install tree

In [None]:
 !tree -d /tmp

In [None]:
#input dimension
IMG_CHANNELS = 3
IMG_ROWS = 150
IMG_COLS = 150

#constant
BATCH_SIZE = 128
EPOCHS = 5

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(IMG_ROWS, IMG_COLS, IMG_CHANNELS)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer=RMSprop(lr=0.001), loss='binary_crossentropy', metrics=['acc'])

In [None]:
TRAIN_DIR = "/tmp/cats-v-dogs/train/"
train_datagen = ImageDataGenerator(rescale=1.0/255.)
train_generator = train_datagen.flow_from_directory(TRAIN_DIR,
                                                    batch_size=BATCH_SIZE,
                                                    class_mode='binary',
                                                    target_size=(IMG_ROWS, IMG_COLS))

TEST_DIR = "/tmp/cats-v-dogs/test/"
validation_datagen = ImageDataGenerator(rescale=1.0/255.)
validation_generator = validation_datagen.flow_from_directory(TEST_DIR,
                                                              batch_size=BATCH_SIZE,
                                                              class_mode='binary',
                                                              target_size=(IMG_ROWS, IMG_COLS))

# Expected Output:
# Found 22498 images belonging to 2 classes.
# Found 2500 images belonging to 2 classes.

In [None]:
train_generator.class_indices

In [None]:
history = model.fit(
        train_generator,
        steps_per_epoch= len(train_generator),        
        validation_data=validation_generator,
        validation_steps=len(validation_generator),
        epochs=EPOCHS)

In [None]:
# evaluate model
_, acc = model.evaluate(validation_generator, steps=len(validation_generator), verbose=0)
print('Test accuracy : %.3f' % (acc * 100.0))

In [None]:
# plot diagnostic learning curves
def summarize_diagnostics(history):
  # plot loss
  # pyplot.subplot(211)
  pyplot.title('Cross Entropy Loss')
  pyplot.plot(history.history['loss'], color='blue', label='train')
  pyplot.plot(history.history['val_loss'], color='orange', label='test')
  pyplot.figure()

  # plot accuracy
  # pyplot.subplot(212)
  pyplot.title('Classification Accuracy')
  pyplot.plot(history.history['acc'], color='blue', label='train')
  pyplot.plot(history.history['val_acc'], color='orange', label='test')
  pyplot.figure()

In [None]:
summarize_diagnostics(history)

In [None]:
import time
t = time.time()

export_file = "/tmp/saved_models/mymodel_{}.hdf5".format(int(t))
model.save(export_file)
export_file

In [None]:
reconstructed_model = tf.keras.models.load_model(export_file)

In [None]:
_, acc = reconstructed_model.evaluate(validation_generator, steps=len(validation_generator), verbose=0)
print('Test accuracy : %.3f' % (acc * 100.0))