# Cat and Dog DataSet

- Cats and Dogs dataset to train a DL model
- [캐글](https://www.kaggle.com)의 [Cat and Dog](https://www.kaggle.com/tongpython/cat-and-dog)


## Import

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
import glob

In [None]:
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip -q -d 'cat_and_dog' '/content/drive/MyDrive/archive.zip'

In [None]:
!ls

## load Data

In [None]:
## in training_set directory
training_cats = glob.glob('./cat_and_dog/training_set/training_set/cats/*.jpg')
training_dogs = glob.glob('./cat_and_dog/training_set/training_set/dogs/*.jpg')

print(len(training_cats), len(training_dogs))

In [None]:
## in test_set directory
test_cats = glob.glob('./cat_and_dog/test_set/test_set/cats/*.jpg')
test_dogs = glob.glob('./cat_and_dog/test_set/test_set/dogs/*.jpg')

print(len(test_cats), len(test_dogs))

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(22, 6))
dog_images = training_dogs[:4]
for i in range(4):
    image = cv2.cvtColor(cv2.imread(dog_images[i]), cv2.COLOR_BGR2RGB)
    axes[i].imshow(image)
    
fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(22, 6))
cat_images = training_cats[:4]
for i in range(4):
    image = cv2.cvtColor(cv2.imread(cat_images[i]), cv2.COLOR_BGR2RGB)
    axes[i].imshow(image)

## Preprocess Data (from dataframe)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.layers import Input, Flatten, Dense
from tensorflow.keras.models import Model

from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import Accuracy
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dropout, MaxPooling3D, BatchNormalization, Activation

In [None]:
test_cats[:3] # only file name

In [None]:
training_cats
training_dogs
#############
test_cats
test_dogs

In [None]:
train_paths = training_cats + training_dogs
train_labels = ['CAT' for _ in range(len(training_cats))] + ['DOG' for _ in range(len(training_dogs))]

pd.set_option('display.max_colwidth', 200) # 200글자 제한

In [None]:
train_df = pd.DataFrame({'path':train_paths, 'label':train_labels})

In [None]:
training_cats[:5] + training_dogs[:5]

In [None]:
BATCH_SIZE = 32
train_generator = ImageDataGenerator(horizontal_flip = True, rescale=1/255.0) #Scale 조정 scale을 재조정하는데 255.0으로 나눌 것이다.
train_generator_iterator = train_generator.flow_from_directory(directory='./cat_and_dog/training_set/training_set', target_size=(224, 224), batch_size=BATCH_SIZE, class_mode='binary') #binary label Encoding

# categorical, sparse, binary로 label을 encoding하는 것에 따라 loss도 바뀐다. ~~~_crossentropy 이런 모습의 loss

In [None]:
test_generator = ImageDataGenerator(rescale=1/255.0)
test_generator_iterator = train_generator.flow_from_directory(directory='./cat_and_dog/test_set/test_set', target_size=(224, 224), batch_size=BATCH_SIZE, class_mode='binary')

* fetch some data

In [None]:
image_array, label_array = next(train_generator_iterator)

In [None]:
image_array.shape

In [None]:
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dropout, MaxPooling3D, BatchNormalization, Activation, GlobalAveragePooling2D
from tensorflow.keras.layers import Conv2D, Dense, Flatten,  MaxPooling2D, Concatenate

In [None]:
def inception_module(x, filters_1x1, filters_3x3_reduce, filters_3x3, filters_5x5_reduce, filters_5x5, filters_pool, name='None'): #인셉션 모듈 구현

    # 첫번째 1X1 Conv
    conv_1x1 = Conv2D(filters_1x1, (1, 1), padding='same', activation='relu')(x)
    
    # 3X3 적용전 1X1 Conv
    conv_3x3 = Conv2D(filters_3x3_reduce, (1, 1), padding='same', activation='relu')(x)
    conv_3x3 = Conv2D(filters_3x3, (3, 3), padding='same', activation='relu')(conv_3x3)
    
    # 5X5 적용전 1X1 Conv
    conv_5x5 = Conv2D(filters_5x5_reduce, (1, 1), padding='same', activation='relu')(x)
    conv_5x5 = Conv2D(filters_5x5, (5, 5), padding='same', activation='relu')(conv_5x5)
    
    # Max Pooling
    pool = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(x)
    pool = Conv2D(filters_pool, (1,1), padding='same', activation='relu')(pool)
    
    # Concatenate
    # axis0 : batch_size, axis1 : height, axis2 : widths, axis3 : depth
    output = Concatenate(axis=-1, name=name)([conv_1x1, conv_3x3, conv_5x5, pool]) #aixs-1  = axis3
    
    return output #인셉션 모듈 안에서만 6개의 데이터 처리가 들어가지만 이게 하나로 쓰임

In [None]:
def build_GoogLeNet():
    input_tensor = Input(shape=(224, 224, 3))
    x = Conv2D(filters=64, kernel_size=(7, 7), padding='same', strides=(2, 2), activation='relu')(input_tensor)

    x = MaxPooling2D(pool_size=(3, 3), padding='same', strides=(2, 2))(x)

    x = Conv2D(filters=64, kernel_size=(1, 1), padding='same', strides=(2, 2), activation='relu')(x)
    x = Conv2D(filters=192, kernel_size=(3, 3), padding='same', strides=(2, 2), activation='relu')(x)

    x = MaxPooling2D(pool_size=(3, 3), padding='same', strides=(2, 2))(x)

    # 인셉션 모듈 3a
    x = inception_module(x, filters_1x1=64, filters_3x3_reduce=96, filters_3x3=128,
                         filters_5x5_reduce=16, filters_5x5=32, filters_pool=32, name='inception_3a')
    # 인셉션 모듈 3b
    x = inception_module(x, 128, 128, 192, 32, 96, 64, name='inception_3b')

    x = MaxPooling2D(pool_size=(3, 3), padding='same', strides=(2, 2))(x)

    # 인셉션 모듈 4a
    x = inception_module(x, 192, 96, 208, 16, 48, 64, name='inception_4a')
    # 인셉션 모듈 4b
    x = inception_module(x, 160, 112, 224, 24, 64, 64, name='inception_4b')
    # 인셉션 모듈 4c
    x = inception_module(x, 128, 128, 256, 24, 64, 64, name='inception_4c')
    # 인셉션 모듈 4d
    x = inception_module(x, 112, 144, 288, 32, 64, 64, name='inception_4d')
    # 인셉션 모듈 4e
    x = inception_module(x, 256, 160, 320, 32, 128, 128, name='inception_4e')

    x = MaxPooling2D(pool_size=(3, 3), padding='same', strides=(2, 2))(x)

    # 인셉션 모듈 5a
    x = inception_module(x, 256, 160, 320, 32, 128, 128, name='inception_5a')
    # 인셉션 모듈 5a
    x = inception_module(x, 384, 192, 384, 48, 128, 128, name='inception_5b')

    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.4)(x)
    output = Dense(1, activation="sigmoid")(x)

    model = Model(inputs=input_tensor, outputs=output)
    model.summary()
    return model

In [None]:
model = build_GoogLeNet()

### Compile Model, Train

In [None]:
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(train_generator_iterator, epochs=30)

In [None]:
model.evaluate(test_generator_iterator)