# 1. About CNN

CNN - Convolutional Neural Networks                    
CNN is a neural network model that is mainly used to process images or image data and includes a preprocessing task called convolution.

# 2. About cat & dog dataset

The dataset provide 25000 cats and dogs images to classify.

In [None]:
''' importing library''' 

import numpy as np
import sys
import tensorflow as tf
import os
import sys
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator


%matplotlib inline
import matplotlib.image as img
import matplotlib.pyplot as plt

In [None]:
'''setting seed'''
seed = 0
np.random.seed(seed)
tf.random.set_seed(3)

In [None]:
import zipfile

zip_files = ['test1', 'train']

for zip_file in zip_files:
    with zipfile.ZipFile("../input/dogs-vs-cats/{}.zip".format(zip_file),"r") as z:
        z.extractall(".")
        print("{} unzipped".format(zip_file))

In [None]:
'''test1, train Data is in current working folder'''
print(os.listdir('../working'))

In [None]:
IMAGE_FOLDER_PATH = "../working/train"
FILE_NAMES = os.listdir(IMAGE_FOLDER_PATH)
WIDTH = 150
HEIGHT = 150

In [None]:
FILE_NAMES[0:5]

In [None]:
# empty list
targets = list()
full_paths = list()
train_cats_dir = list()
train_dogs_dir = list()

# finding each file's target
for file_name in FILE_NAMES:
    target = file_name.split(".")[0] # target name
    full_path = os.path.join(IMAGE_FOLDER_PATH, file_name)
    
    if(target == "dog"):
        train_dogs_dir.append(full_path)
    if(target == "cat"):
        train_cats_dir.append(full_path)
    
    full_paths.append(full_path)
    targets.append(target)

dataset = pd.DataFrame() # make dataframe
dataset['image_path'] = full_paths # file path
dataset['target'] = targets # file's target
    

In [None]:
dataset.head(10)

In [None]:
print("total data counts:", dataset['target'].count())
counts = dataset['target'].value_counts()
print(counts)

## cat data

In [None]:
rows = 4
cols = 4
axes = []
fig=plt.figure(figsize=(10,10))
i = 0

for a in range(rows*cols):
    b = img.imread(train_cats_dir[i])
    axes.append(fig.add_subplot(rows,cols,a+1))
    plt.imshow(b)
    i+=1
fig.tight_layout()
plt.show()

## dog data

In [None]:
rows = 4
cols = 4
axes = []
fig=plt.figure(figsize=(10,10))
i = 0

for a in range(rows*cols):
    b = img.imread(train_dogs_dir[i])
    axes.append(fig.add_subplot(rows,cols,a+1))
    plt.imshow(b)
    i+=1
fig.tight_layout()
plt.show()

# 3. Data preprocessing

## Reason for rescaling

* The brightness of each pixel is between 0 and 255.
* Keras performs optimally when data's value is between 0 and 1.
* This process is called data normalization.

In [None]:
# Each image file consists of a number from 0 to 255.
tmp = img.imread(train_cats_dir[0])
tmp = tmp.astype(int)
tmp0 = tmp[0].astype(int)

for x in tmp0:
    for i in x:
        sys.stdout.write('%d\t' % i)
    sys.stdout.write('\n')

## data split
To prevent overfitting, the data should be divided into train data and test data.                   

* [about overfitting](https://en.wikipedia.org/wiki/Overfitting)

In [None]:
dataset_train, dataset_test = train_test_split(dataset, test_size=0.2, random_state=seed)

## ImageDataGenerator

When there is little data to train, we have to use **ImageDataGenerator** 
to increase the number of data.         
It is recommended to use only scaling for **test data**.

* rescale = 1./255 : change the value between 0 and 1 
* rotation_range = 15 : Random rotation within 15 degrees
* shear_range = 0.1 : shear range 10%
* zoom_range = 0.2 : zoom range 20%
* horizontal_flip = True : Randomly flip horizontally.
* width_shift_range = 0.1 : Randomly move the original image horizontally within 10% of the width
* height_shift_range=0.1 : Randomly move the original image vertically within 10% of the width

* [about ImageDataGenerator_kor.ver](https://keras.io/ko/preprocessing/image/)
* [about ImageDataGenerator_Image change process_kor.ver](https://tykimos.github.io/2017/06/10/CNN_Data_Augmentation/)            




## flow_from_dataframe

Save the image data data to the pandas data frame and send it to the ImageDataGenerator.       
* dataframe: Dataframe must consist of file path and target
* x_col: column in 'dataframe' that contains the filenames
* y_col: column in 'dataframe' that has the target data.
* target_size: image size
* class_mode: "binary" -> binary classification, "categorical" -> categorical classification (the data should be one-hot encoded label)
* batch_size: size of data batch

In [None]:
train_datagen=ImageDataGenerator(
rotation_range=15,
rescale=1./255,
shear_range=0.1,
zoom_range=0.2,
horizontal_flip=True,
width_shift_range=0.1,
height_shift_range=0.1)

train_datagenerator=train_datagen.flow_from_dataframe(dataframe=dataset_train,
                                                     x_col="image_path",
                                                     y_col="target",
                                                     target_size=(WIDTH, HEIGHT),
                                                     class_mode="binary",
                                                     batch_size=150)

In [None]:
test_datagen = ImageDataGenerator(rescale=1./255)
test_datagenerator=test_datagen.flow_from_dataframe(dataframe=dataset_test,
                                                   x_col="image_path",
                                                   y_col="target",
                                                   target_size=(WIDTH, HEIGHT),
                                                   class_mode="binary",
                                                   batch_size=150)

# 4. CNN model

In [None]:
model = Sequential() # implement model layer 
model.add(Conv2D(32, kernel_size=(3,3), input_shape=(WIDTH, HEIGHT, 3), activation='relu'))
model.add(Conv2D(64, kernel_size=(3,3), activation = 'relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.summary()

In [None]:
model.compile(loss="binary_crossentropy", optimizer='adam', metrics=['accuracy'])
print("[INFO]: model compiled...")

In [None]:
modelHistory=model.fit(train_datagenerator,
                       epochs=50,
                       validation_data=test_datagenerator,
                       validation_steps=dataset_test.shape[0]/150,
                       steps_per_epoch=dataset_train.shape[0]/150)

In [None]:
acc = modelHistory.history['accuracy']
val_acc = modelHistory.history['val_accuracy']
loss = modelHistory.history['loss']
val_loss = modelHistory.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'go', label='Training Loss')
plt.plot(epochs, val_loss, 'g', label='Validation Loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

