In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
import sys
from glob import glob

import math
import random 

import cv2                                 # image를 읽기 위한 open cv library
import xml.etree.ElementTree as et         # xml 파일을 parsing 하기 위한 library
from matplotlib.patches import Rectangle   # Bounding box를 그리기 위함

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import activations
from tensorflow.keras.applications import EfficientNetB0

import albumentations as A    # CoarseDropout 인지 안 된다는 문제 발생 -> pip install 아래 세 가지 코드 실행 필요

import tensorflow_hub as hub

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_style('whitegrid')

drive_project_root = 'drive/MyDrive/data/'
sys.path.append(drive_project_root)

image_root = 'drive/MyDrive/data/images/'
anno_root = 'drive/MyDrive/data/annotations/'

image_dir = image_root
bbox_dir = anno_root + 'xmls/'    # bounding box
seg_dir = anno_root + 'trimaps/'  # segmentation map

os.environ['CUDA_VISIBLE_DEVICES'] = '1'

!ls

In [None]:
pip install git+https://github.com/albumentations-team/albumentations.git  # Data Agumentation

In [None]:
pip uninstall opencv-python

In [None]:
pip install opencv-python

# Multiclass Classification
- Oxford pet dataset
  - 총 37 종류의 개와 고양이를 구분하는 label 있음 (ID 컬럼)
- 해결책은 이진분류와 크게 다르지 않음
  - 위에서 사용한 efficientNet B0 코드를 그대로 활용하고자 함

- 각 class에 속하는 data는 200개 씩 균등하게 분포되어 있음

In [None]:
csv_path = drive_project_root+'kfolds.csv'
df = pd.read_csv(csv_path)
print(np.unique(df['id']))
value_counts = df['id'].value_counts().sort_index()

plt.figure(figsize=(10, 5))
plt.bar(range(len(value_counts)), value_counts.values)
plt.xticks(range(len(value_counts)), value_counts.index.values)
plt.tight_layout()
plt.show()

- multi-class classification에 맞게 binary classification 수정

In [None]:
def get_model(input_shape):

    inputs = keras.Input(input_shape)

    # Feature extract
    base_model = EfficientNetB0(
        input_shape=input_shape,
        weights='imagenet',
        include_top=False,
        pooling='avg'
    )

    x = base_model(inputs)
    # softmax : 각 class에 대한 확률
    outputs = layers.Dense(37, activation='softmax')(x)  # multi-class에 맞게 output, activation 변경
    model = keras.Model(inputs, outputs)

    return model

input_shape = (256, 256, 3)
model = get_model(input_shape)

adam = keras.optimizers.Adam(lr=0.0001)

model.compile(
    optimizer=adam,
    # ★ 원래 ouput이 37개니까 이에 맞게 label도 one-hot encoding을 해 줘야 하지만
    # sparse_categorical_crossentropy를 사용하면 one-hot encoding 필요 없음
    # label index만 넘겨서 모델이 학습하게 하기 때문
    loss='sparse_categorical_crossentropy', # multi-class에 맞게 변경
    metrics='accuracy'
)

model.summary()

- Augmentation : 수정 필요 없음

In [None]:
class Augmentation:
    def __init__(self, size, mode='train'):
        if mode == 'train':
            self.transform = A.Compose([
                A.HorizontalFlip(p=0.5),
                A.ShiftScaleRotate(
                    p=0.5,
                    shift_limit=0.05,  # 이미지의 가로 길이가 최대 몇% 넘어가도 되는 지
                    scale_limit=0.05,  # 이미지를 최대 몇% 확대/축소할지
                    rotate_limit=15,
                ),

                # 이미지에 구멍을 뚫는 것
                A.CoarseDropout(
                    p=0.5,
                    max_holes=8,  # 최대 구멍 개수
                    max_height=int(0.1 * size),  # 가로 최대 길이 : 이미지의 10%
                    max_width=int(0.1 * size),
                ),

                A.RandomBrightnessContrast(p=0.2),
            ])

    def __call__(self, **kwargs):
        if self.transform:   # train mode인 경우
            augmented = self.transform(**kwargs)
            img = augmented['image']
            return img

- DataGenerator 일부 수정 필요 : label을 return 하는 부분

In [None]:
class DataGenerator(keras.utils.Sequence):
    def __init__(self, batch_size, csv_path, image_size,
                 fold, mode='train', shuffle=True):
        self.batch_size = batch_size
        self.image_size = image_size
        self.shuffle = shuffle
        self.fold = fold
        self.mode = mode
        
        self.df = pd.read_csv(csv_path)
        
        if self.mode == 'train':
            self.df = self.df[self.df['fold'] != self.fold]
        elif self.mode == 'val':
            self.df = self.df[self.df['fold'] == self.fold]
        
        #### Remove invalid files
        #### https://github.com/tensorflow/models/issues/3134
        invalid_filenames = [
            'Egyptian_Mau_14',
            'Egyptian_Mau_139',
            'Egyptian_Mau_145',
            'Egyptian_Mau_156',
            'Egyptian_Mau_167',
            'Egyptian_Mau_177',
            'Egyptian_Mau_186',
            'Egyptian_Mau_191',
            'Abyssinian_5',
            'Abyssinian_34',
            'chihuahua_121',
            'beagle_116'
        ]
        self.df = self.df[~self.df['file_name']. \
                          isin(invalid_filenames)]

        self.transform = Augmentation(image_size, mode)

        self.on_epoch_end()
            
    def __len__(self):
        return math.ceil(len(self.df) / self.batch_size)
        
    def __getitem__(self, idx):
        strt = idx * self.batch_size
        fin = (idx + 1) * self.batch_size
        data = self.df.iloc[strt:fin]
        
        batch_x, batch_y = self.get_data(data)
        
        return np.array(batch_x), np.array(batch_y)
        
    def get_data(self, data):
        batch_x = []
        batch_y = []
        
        for _, r in data.iterrows():
            file_name = r['file_name']
            
            image = cv2.imread(f'drive/MyDrive/data/images/{file_name}.jpg')
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            
            image = cv2.resize(image, (self.image_size, self.image_size))

            if self.mode == 'train':
                image = image.astype('uint8')
                image = self.transform(image=image)

            image = image.astype('float32')
            image = image / 255.
            
            # sparse_categorical_crossentropy를 사용하기 때문에
            # 따로 one-hot encoding 변환 없이 기존처럼 id의 index만 넘기면 됨
            # 1~37로 되어있으니 -1을 해서 0~36으로 변환
            label = int(r['id']) - 1
            
            batch_x.append(image)
            batch_y.append(label)
        
        return batch_x, batch_y

    def on_epoch_end(self):
        if self.shuffle:
            self.df = self.df.sample(frac=1).reset_index(drop=True)

In [None]:
csv_path = drive_project_root+'kfolds.csv'

train_generator =  DataGenerator(
    fold=1,
    mode='train',
    csv_path=csv_path,
    batch_size=128,
    image_size=256,
    shuffle=True
)

valid_generator =  DataGenerator(
    fold=1,
    mode='val',
    csv_path=csv_path,
    batch_size=128,
    image_size=256,
    shuffle=True
)

- 10 epoch로 학습했을 때 이진분류 때보다는 성능이 약간 낮음
- train과 validation 사이 차이도 커짐
- 그래도 매 epoch마다 train, validation 모두 accuracy가 증가하고 있기 때문에 epoch 개수를 늘리면 성능이 더 좋아질 가능성이 큼

In [None]:
history = model.fit(
    train_generator,
    validation_data=valid_generator,
    epochs=10,
    verbose=1
)

In [None]:
history = history.history

plt.figure(figsize=(15, 5))
plt.subplot(1, 2, 1)
plt.plot(history['loss'], label='train')
plt.plot(history['val_loss'], label='val')
plt.legend()
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title("Loss")

plt.subplot(1, 2, 2)
plt.plot(history['accuracy'], label='train')
plt.plot(history['val_accuracy'], label='val')
plt.legend()
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title("Accuracy")
plt.show()
