<a href="https://colab.research.google.com/github/sunwoongc/deeplearning-project/blob/main/futureskill_dogs_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dog breed Classification

## Naive Approach

### 1. Importing modules

In [None]:
import scipy
from scipy import io
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import glob
import cv2

from keras.utils import np_utils

### 2. Explore and load the data

Load the data from scratch.

In [None]:
# 이미지셋을 불러옵니다.
def load_image(filepath):
	image = Image.open(filepath)
	return np.array(image)

# 데이터 라벨링을 합니다. 
def load_label(filepath):
	matdata = scipy.io.loadmat(filepath)
	return matdata['file_list'], matdata['labels']

By using glob, we can access to the directory and explore the data.

In [None]:
file_path = '/content/drive/MyDrive/stanford_dog_dataset/Images/'
files = glob.glob(file_path + '/*')
num_classes = len(files)

num_images = 0
for images in files:
    num_images += len(glob.glob(images + '/*'))


In [None]:
print(f'There are total {num_images} images')
print(f'There are total {num_classes} breeds')

### 3. Generating dataset

Since I have the both file list and label list, I choose to generate dataset by making custom function. I resize the image size to (150, 150) and make the image shape as (150,150,3), because some picture has 4 channels, (R,G,B,A) where A stands for AlphaChannel, represents transparency.

In [None]:
label_path = '/content/drive/MyDrive/stanford_dog_dataset/lists/file_list.mat'
file_list, labels = load_label(label_path)

In [None]:
def generate_dataset(file_list, labels):
    X = []
    Y = []
    for dir, label in tqdm(zip(file_list, labels), total=len(labels)): 
        image = load_image(file_path + dir[0][0])
        image = cv2.resize(image, (150, 150)) ## resize
        image = image[...,:3]
        X.append(image)
        Y.append(label - 1)
    return np.array(X, dtype=np.float32), np_utils.to_categorical(Y)

It takes such a long time because Colab has to load the data on its own memory(RAM).

*It can compute on data that's on the memory of GPU - 'FSDL Lecture 6'*

In [None]:
X, Y = generate_dataset(file_list, labels)

In [None]:
print(X.shape)
print(X.dtype)
print(type(X))
print(Y.shape)
print(type(Y))

### 4. Data Preprocessing

In here, I only do normalize step, which divide the images by 255, for preprocessing step. Because of the lack of RAM(25.51GB), I have to normalize the data batch by batch.

Here the batch_size=700 is given, by calculating MNIST dataset's size as 60000 * 28 * 28 * 1, divided by 150 * 150 * 3

In [None]:
def normalize_image_batch(data, batch_size=700):
    for idx in tqdm(range(0, len(data), batch_size)):
        data[idx:idx+batch_size] = data[idx:idx+batch_size] / 255.
    return data

In [None]:
X = normalize_image_batch(X, 700)

###5. Visualize some data

To show that my 'preprocessing' works, I'll show some transformed image.

In [None]:
def visualize_data(n: int=4) -> int:
    fig, axs = plt.subplots(nrows = int(n/2), ncols = 2, figsize=(5,8))
    axes = axs.ravel()
    for i in np.arange(n):
        num = np.random.randint(0, len(X))
        axes[i].imshow(X[num])
        axes[i].set_title(np.where(Y[num]==1)[0])
    plt.tight_layout()

In [None]:
visualize_data(2)

### 6. Train-test-split

For training and inferencing, I have to split the data. There is a module, `train_test_split` in `sklearn.preprocessing`. I create my own split function, but I think I can use the module too. Sometimes both method can have 'memory problem'.

In [None]:
def my_train_test_split(x, y, split_ratio=0.2, shuffle=True):
    num_data = x.shape[0]
    split = int(num_data * (1-split_ratio))
    idx = np.random.permutation(num_data) if shuffle else np.arange(num_data)

    x = x[idx]
    y = y[idx]
    x_train = x[:split, :]
    y_train = y[:split]
    x_test = x[split:, :]
    y_test = y[split:]

    return (x_train, y_train), (x_test, y_test)

In [None]:
(x_train, y_train), (x_test, y_test) = my_train_test_split(X, Y, split_ratio=0.2)

In [None]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

### 7. Train the model

Here, I make my own model architecture based on CNN, but actually just copy and paste the past architecture I used for MNIST classification task.

In [None]:
from tensorflow.keras.layers import *
from tensorflow.keras import Sequential
import tensorflow as tf

model = Sequential()
model.add(tf.keras.Input(shape=x_train.shape[1:]))

model.add(tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same'))
model.add(tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same'))
model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
model.add(Dropout(0.5))

model.add(tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same'))
model.add(tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same'))
model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
model.add(Dropout(0.5))

model.add(tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same'))
model.add(tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same'))
model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding='same'))
model.add(Dropout(0.5))

model.add(tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same'))
model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding='same'))

model.add(Flatten())
model.add(Dense(512, activation='relu', kernel_regularizer='l2'))
model.add(Dense(1024, activation='relu'))
model.add(Dense(y_train.shape[-1], activation='softmax'))

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
metric_history_1 = model.fit(x_train, y_train, epochs=30, batch_size=512, validation_data=(x_test, y_test))

###8. Loss/Accuracy Graph

I plot the loss and accuracy graph by using matplotlib, and check the accuracy by counting the number of correct classified item. And it has **POOR** results.

In [None]:
print(metric_history_1.history.keys())
print(metric_history_1.history.keys())
print(type(metric_history_1.history))
print(type(metric_history_1.history['loss']))

print(metric_history_1.history.items())

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(8, 6))
axs = axes.ravel()
train_acc = metric_history_1.history['accuracy']
val_acc = metric_history_1.history['val_accuracy']
train_loss = metric_history_1.history['loss']
val_loss = metric_history_1.history['val_loss']

axs[0].plot(train_acc, label='train_acc', ls='--', color='dodgerblue', zorder=2)
axs[0].plot(val_acc, label='val_acc', ls='-', color='tomato', zorder=2)
axs[0].set_title('Accuracy')
axs[0].legend()

axs[0].axhspan(ymin=0, ymax=0.1, facecolor='orangered', alpha=0.2)

axs[1].plot(train_loss, label='train_loss', ls='--', color='dodgerblue', zorder=2)
axs[1].plot(val_loss, label='val_loss', ls='-', color='tomato', zorder=2)
axs[1].set_title('Loss')
axs[1].legend()

axs[1].axvspan(xmin=6.0, xmax=6.5, facecolor='orangered', alpha=0.2)

plt.show()

In [None]:
y_pred = model.predict(x_test)
y_pred_label = np_utils.to_categorical(np.argmax(model.predict(x_test), axis=-1), num_classes)

In [None]:
correctd_image = len(np.where((y_pred_label == y_test).all(axis=1))[0])

In [None]:
print(f'Poorly Result: Only {correctd_image} are correct out of {len(y_pred)}')

## Transfer Learning without data preprocessing (Transfer Learning but combined with above naive approch.)

Since I think it'll be done just by using pre-trained model and do the same task with above, I don't use any preprocessing method and just lend the **VGG16** model to use in the same above approach.

In [None]:
from keras.applications.vgg16 import VGG16#, preprocess_input

In [None]:
base_model = VGG16(include_top=False,
                   input_shape = (150, 150, 3),
                   weights='imagenet')
for layer in base_model.layers:
    layer.trainable = False

model_2 = Sequential()
model_2.add(base_model)
model_2.add(GlobalAveragePooling2D())
model_2.add(Dropout(0.5))
model_2.add(Dense(2048, activation='relu'))
model_2.add(Dense(4096, activation='relu'))
model_2.add(Dense(num_classes, activation='softmax'))
model_2.summary()

In [None]:
model_2.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
metric_history_2 = model_2.fit(x_train, y_train, batch_size=512, epochs=15, validation_data=(x_test, y_test))

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(8, 6))
axs = axes.ravel()
train_acc = metric_history_2.history['accuracy']
val_acc = metric_history_2.history['val_accuracy']
train_loss = metric_history_2.history['loss']
val_loss = metric_history_2.history['val_loss']

axs[0].plot(train_acc, label='train_acc', ls='--', color='dodgerblue', zorder=2)
axs[0].plot(val_acc, label='val_acc', ls='-', color='tomato', zorder=2)
axs[0].set_title('Accuracy')
axs[0].legend()

axs[0].axhspan(ymin=0, ymax=0.1, facecolor='orangered', alpha=0.2)
axs[0].axhline(y=0.36, ls='--', color='dodgerblue')
axs[0].axhline(y=0.25, ls='--', color='tomato')

axs[1].plot(train_loss, label='train_loss', ls='--', color='dodgerblue', zorder=2)
axs[1].plot(val_loss, label='val_loss', ls='-', color='tomato', zorder=2)
axs[1].set_title('Loss')
axs[1].legend()

axs[1].axvspan(xmin=60 , xmax=60.5, facecolor='orangered', alpha=0.2)

plt.show()

It has almost the same **poor** result.

## Using ImageDataGenerator to load and generate dataset

###Using `flow_from_directory` method

By using `flow_from_directory` method, we don't have to use the `generate_dataset` function anymore. `flow_from_directory` method returns a DirectoryIterator yielding tuples of `(x, y)` where x in a numpy array containing a batch of images with shape `(batch_size, *target_size, channels)`, and `y` is a numpy array of corresponding labels.

It is really comfortable that we don't have to generate dataset explicitly, since it generates **batches of augmented data**.

####1. Transfer Learning - VGG16 

In [None]:
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import *

Since I'm going to use pre-trained model, it is good to use their own preprocessing method, `preprocessing_function = preprocess_input` takes that role, and `preprocess_input` is imported from `keras.applications.vgg16`.  Thie can be applied to other pre-trained model as well.

We can simply used rescaling factor only, e.g. `X = X / 255.`, but better training performance, it is recommended to use `preprocess_input`.

`preprocess_input` has parameter `mode`, and it is one of "caffe", "tf" or "torch" and each mode has different method of preprocessing. [(docs)](https://github.com/keras-team/keras/blob/master/keras/applications/imagenet_utils.py)

In [None]:
datagen = ImageDataGenerator(
    # rescale=1./255,
    shear_range=0.1,
    zoom_range=0.1,
    brightness_range=[0.9, 1.1],
    horizontal_flip=True,
    preprocessing_function = preprocess_input,
    validation_split=.2)

In [None]:
dir = '/content/drive/MyDrive/stanford_dog_dataset/Images/'

In [None]:
train_generator = datagen.flow_from_directory(
    directory=dir,
    target_size=(100, 100),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=128,
    shuffle=True,
    subset='training'
    )

In [None]:
test_generator = datagen.flow_from_directory(
    directory=dir,
    target_size=(100, 100),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=128,
    shuffle=False,
    subset='validation'
    )

In [None]:
base_model = VGG16(include_top=False,
                   input_shape = (100, 100, 3),
                   weights='imagenet')
for layer in base_model.layers:
    layer.trainable = False

model = Sequential()
model.add(base_model)
model.add(GlobalAveragePooling2D())
model.add(BatchNormalization())
model.add(Dense(1024))
model.add(BatchNormalization())
model.add(Dense(120, activation='softmax'))
model.summary()

In [None]:
opt = Adam(lr=0.0001)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

In [None]:
history = model.fit(train_generator, 
                    validation_data = test_generator,
                    epochs=10)

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(8, 6))
axs = axes.ravel()
train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
train_loss = history.history['loss']
val_loss = history.history['val_loss']

axs[0].plot(train_acc, label='train_acc', ls='--', color='dodgerblue', zorder=2)
axs[0].plot(val_acc, label='val_acc', ls='-', color='tomato', zorder=2)
axs[0].set_title('Accuracy')
axs[0].legend()

# axs[0].axhspan(ymin=0, ymax=0.1, facecolor='orangered', alpha=0.2)
# axs[0].axhline(y=0.36, ls='--', color='dodgerblue')
# axs[0].axhline(y=0.25, ls='--', color='tomato')

axs[1].plot(train_loss, label='train_loss', ls='--', color='dodgerblue', zorder=2)
axs[1].plot(val_loss, label='val_loss', ls='-', color='tomato', zorder=2)
axs[1].set_title('Loss')
axs[1].legend()

# axs[1].axvspan(xmin=80 , xmax=80.5, facecolor='orangered', alpha=0.2)

plt.show()

It isn't achieved that much accuracy since I just train it for 10 epochs, but the trend looks reasonable.

#### Transfer Learning - EfficientNet

In [None]:
from tensorflow.keras.applications.efficientnet import EfficientNetB0, preprocess_input

In [None]:
datagen = ImageDataGenerator(
    # rescale=1./255,
    shear_range=0.1,
    zoom_range=0.1,
    brightness_range=[0.9, 1.1],
    preprocessing_function = preprocess_input,
    horizontal_flip=True,
    validation_split=.2)

# test_datagen = ImageDataGenerator(rescale=1./255, validation_split=.2)

In [None]:
dir = '/content/drive/MyDrive/stanford_dog_dataset/Images/'

In [None]:
train_generator = datagen.flow_from_directory(
    directory=dir,
    target_size=(224, 224),
    class_mode='categorical',
    batch_size=128,
    shuffle=True,
    subset='training'
    )

In [None]:
test_generator = datagen.flow_from_directory(
    directory=dir,
    target_size=(224, 224),
    class_mode='categorical',
    batch_size=128,
    shuffle=False,
    subset='validation'
    )

In [None]:
base_model = EfficientNetB0(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
for layer in base_model.layers:
    layer.trainable=False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = BatchNormalization()(x)
x = Dense(1280, activation='relu')(x)
x = BatchNormalization()(x)
predictions = Dense(120, activation='softmax')(x)

In [None]:
from tensorflow.keras.models import *
model = Model(inputs=base_model.input, outputs=predictions)
# for layer in model.layers[-2:]: ## added 
#     layer.trainable=True

In [None]:
optimizer = Adam(lr=0.0001)
loss = 'categorical_crossentropy'

In [None]:
model.summary()

In [None]:
for i, layer in enumerate(model.layers):
    print(i, layer.name, layer.trainable)

In [None]:
model.compile(optimizer=optimizer,
              loss=loss,
              metrics=['accuracy'])

In [None]:
history_2 = model.fit(train_generator,
                      steps_per_epoch = len(train_generator), 
                      validation_data = test_generator,
                      validation_steps = len(test_generator),
                      epochs=15)

In [None]:
import matplotlib.pyplot as plt


fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(8, 6))
axs = axes.ravel()
train_acc = history_2.history['accuracy']
val_acc = history_2.history['val_accuracy']
train_loss = history_2.history['loss']
val_loss = history_2.history['val_loss']

axs[0].plot(train_acc, label='train_acc', ls='--', color='dodgerblue', zorder=2)
axs[0].plot(val_acc, label='val_acc', ls='-', color='tomato', zorder=2)
axs[0].set_title('Accuracy')
axs[0].legend()

# axs[0].axhspan(ymin=0, ymax=0.1, facecolor='orangered', alpha=0.2)
# axs[0].axhline(y=0.36, ls='--', color='dodgerblue')
# axs[0].axhline(y=0.25, ls='--', color='tomato')

axs[1].plot(train_loss, label='train_loss', ls='--', color='dodgerblue', zorder=2)
axs[1].plot(val_loss, label='val_loss', ls='-', color='tomato', zorder=2)
axs[1].set_title('Loss')
axs[1].legend()

# axs[1].axvspan(xmin=80 , xmax=80.5, facecolor='orangered', alpha=0.2)

plt.show()

#### Transfer Learning - ResNet50
According to the above experiment, I realized that all that 15 epochs are not needed. What if tranining accuracy become about 95% with just 3 epochs(also validation accuracy)? Then all the leftover epochs are redundant.

So I decide to put some **Callbacks**.

In [None]:
### Downloading dataset
from google.colab import drive

drive.mount('/content/gdrive')


In [None]:
root_path = 'gdrive/My Drive/stanford_dog_dataset/'

In [None]:
from tensorflow.keras.applications.resnet50 import decode_predictions, ResNet50, preprocess_input
from tensorflow.keras.callbacks import *
from tensorflow.keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import *

In [None]:
checkpoint_filepath = '/content/gdrive/MyDrive/ResNet50_dog_breed_classifier.h5'
checkpoint = ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_accuracy',
    verbose=1,
    save_weights_only=True,
    mode='max',
    save_best_only=True
)

In [None]:
import datetime as dt
tensorboard = TensorBoard(log_dir="/content/gdrive/MyDrive/logs-"+dt.datetime.now().strftime("%Y%m%d%H%M%S"),
                          histogram_freq=0,
                          write_graph=False,
                          update_freq='epoch')

In [None]:
earlystop = EarlyStopping(monitor='accuracy',
                          min_delta=0.0001,
                          patience=20,
                          verbose=1,
                          mode='auto',
                          baseline=None,
                          restore_best_weights=True)

In [None]:
import numpy as np
reducelr = ReduceLROnPlateau(monitor='val_accuracy',
                             factor=np.sqrt(.1),
                             patience=5,
                             verbose=1,
                             mode='auto',
                             min_delta=0.0001,
                             cooldown=0,
                             min_lr=0.0000001)

In [None]:
def epoch_begin(epoch, logs):
    print(f"Learning rate is: {K.eval(model.optimizer.lr)}")

def epoch_end(epoch, logs):
    print(f"End of epoch {epoch}, Learning rate is: {K.eval(model.optimizer.lr)}")

def train_begin(logs):
    print("Training begins")

lambdacallback = LambdaCallback(on_epoch_begin=epoch_begin,
                                on_epoch_end=epoch_end,
                                on_train_begin=train_begin)

In [None]:
datagen = ImageDataGenerator(
    # rescale=1./255,
    shear_range=0.1,
    zoom_range=0.1,
    brightness_range=[0.9, 1.1],
    preprocessing_function = preprocess_input,
    horizontal_flip=True,
    validation_split=.2)

# test_datagen = ImageDataGenerator(rescale=1./255, validation_split=.2)

In [None]:
dir = '/content/gdrive/MyDrive/stanford_dog_dataset/Images'

In [None]:
train_generator = datagen.flow_from_directory(
    directory=dir,
    target_size=(224, 224),
    class_mode='categorical',
    batch_size=128,
    shuffle=True,
    subset='training'
    )

In [None]:
test_generator = datagen.flow_from_directory(
    directory=dir,
    target_size=(224, 224),
    class_mode='categorical',
    batch_size=128,
    shuffle=False,
    subset='validation'
    )

In [None]:
base_model = ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
for layer in base_model.layers:
    layer.trainable=False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = BatchNormalization()(x)
x = Dense(1280, activation='relu')(x)
x = BatchNormalization()(x)
predictions = Dense(120, activation='softmax')(x)

In [None]:
from tensorflow.keras.models import *
model = Model(inputs=base_model.input, outputs=predictions)
# for layer in model.layers[-2:]: ## added 
#     layer.trainable=True

In [None]:
optimizer = Adam(lr=0.0001)
loss = 'categorical_crossentropy'

In [None]:
model.summary()

In [None]:
for i, layer in enumerate(model.layers):
    print(i, layer.name, layer.trainable)

In [None]:
model.compile(optimizer=optimizer,
              loss=loss,
              metrics=['accuracy'])

In [None]:
history_3 = model.fit(train_generator,
                      validation_data = test_generator,
                      epochs=15,
                      callbacks=[reducelr, earlystop, lambdacallback, tensorboard, checkpoint])

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(8, 6))
axs = axes.ravel()
train_acc = history_3.history['accuracy']
val_acc = history_3.history['val_accuracy']
train_loss = history_3.history['loss']
val_loss = history_3.history['val_loss']

axs[0].plot(train_acc, label='train_acc', ls='--', color='dodgerblue', zorder=2)
axs[0].plot(val_acc, label='val_acc', ls='-', color='tomato', zorder=2)
axs[0].set_title('Accuracy')
axs[0].legend()

axs[1].plot(train_loss, label='train_loss', ls='--', color='dodgerblue', zorder=2)
axs[1].plot(val_loss, label='val_loss', ls='-', color='tomato', zorder=2)
axs[1].set_title('Loss')
axs[1].legend()


plt.show()

### Kaggle's help

In [None]:
import numpy as np
import matplotlib.image as mpimg
from PIL import Image
import matplotlib.pyplot as plt
image_num = np.random.randint(0, test_generator.samples)
name = test_generator.filepaths[image_num]
plt.imshow(mpimg.imread(name))

img = Image.open(name).resize((224, 224))
probabilites = model.predict(preprocess_input(np.expand_dims(img, axis=0)))
breed_list = tuple(zip(test_generator.class_indices.values(), test_generator.class_indices.keys()))

for i in probabilites[0].argsort()[-5:][::-1]:
    print(probabilites[0][i], ": ", breed_list[i])

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
test_generator.reset()
predictions = model.predict(test_generator, steps=len(test_generator))
y = np.argmax(predictions, axis=1)

print('Classification Report')
cr = classification_report(y_true=test_generator.classes, y_pred=y, target_names=test_generator.class_indices)
print(cr)

In [None]:
import pandas as pd
import seaborn as sns

print('Confusion Matrix')

cm = confusion_matrix(test_generator.classes, y)
df = pd.DataFrame(cm, columns=test_generator.class_indices)
plt.figure(figsize=(80,80))
sns.heatmap(df, annot=True)