### data augementation(데이터 증강)

- 데이터가 없으면 pretrained 이용
- 부족한 데이터에 대한 해법 => augmentation

- 일반화 성능 향상

In [None]:
# 증강전 모델학습
import os
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, optimizers

import math
import numpy as np
from glob import glob
from tensorflow.keras.preprocessing import image
import matplotlib.pyplot as plt

IMG_CHANNELS = 3
IMG_ROWS = 256
IMG_COLS = 256

#define the convnet
def build(input_shape, classes):
    model = models.Sequential()
    model.add(layers.Convolution2D(256, (3, 3), activation='relu',
                        input_shape=input_shape))
    model.add(layers.MaxPooling2D(pool_size=(3, 3)))
    model.add(layers.Dropout(0.3))

    model.add(layers.Convolution2D(128, (6, 6), activation='relu'))
    model.add(layers.MaxPooling2D(pool_size=(3, 3)))
    model.add(layers.Dropout(0.3))

    model.add(layers.Convolution2D(64, (6, 6), activation='relu'))
    model.add(layers.MaxPooling2D(pool_size=(3, 3)))
    model.add(layers.Dropout(0.3))

    model.add(layers.Flatten())
    model.add(layers.Dense(32, activation='relu'))
    model.add(layers.Dense(classes, activation='softmax'))
    return model

In [None]:
folder_name = '../data/data_mw/'
all_folders = os.listdir(folder_name)

CLASSES = len(all_folders)  # 폴더 안의 폴더들

image_vector = []
labels_vector = []
for foldername in all_folders:
    forder_list = os.listdir(folder_name + foldername)
    for filename in forder_list:
        try:
            file_path = folder_name + foldername +'/'+ filename
            print(file_path)
            img = image.load_img(file_path, target_size=(IMG_ROWS, IMG_COLS))
            img_array = image.img_to_array(img)
            image_vector.append(img_array)
            labels_vector.append(all_folders.index(foldername))
        except:
            pass

# numpy 배열로 변환
image_vector = np.array(image_vector)
labels_vector = np.array(labels_vector)

y = labels_vector

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(image_vector, y, test_size=0.20, random_state=0)

In [None]:
X_train, X_test = X_train / 255, X_test / 255
# convert to categorical
# convert class vectors to binary class matrices
y_train = tf.keras.utils.to_categorical(y_train, CLASSES)
y_test = tf.keras.utils.to_categorical(y_test, CLASSES)

In [None]:
model=build((IMG_ROWS, IMG_COLS, IMG_CHANNELS), CLASSES)
model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(
monitor='val_loss', # 검증 손실을 기준으로 min_delta=0.001, # 0.001보다 작은 변화는 무시 
patience=10, # 10 에포크 동안 개선 없으면 중지 
verbose=1, # 진행 상황 출력 
mode='auto' # 자동으로 최소/최대를 결정 )
)

model.compile(loss='categorical_crossentropy', optimizer=optimizers.RMSprop(learning_rate=0.001), metrics=['accuracy'])

history = model.fit(X_train, y_train, batch_size=32, epochs=100, validation_split=0.1, callbacks=[early_stopping])

In [None]:
import matplotlib.pyplot as plt
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['train', 'val'])
plt.show()

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.legend(['train','val'])
plt.show()

In [None]:
model.save('model_man_woman.h5')  # H5 파일 포맷으로 저장

In [None]:
from tensorflow.keras.models import load_model

# 저장된 모델 파일 로드
model_saved = load_model('model_man_woman.h5')

In [None]:
folder_name = '../data/data_mw_add/'
all_folders = os.listdir(folder_name)

CLASSES = len(all_folders)  # 폴더 안의 폴더들

image_vector_2 = []
labels_vector_2= []
for foldername in all_folders:
    forder_list = os.listdir(folder_name + foldername)
    for filename in forder_list:
        try:
            file_path = folder_name + foldername +'/'+ filename
            print(file_path)
            img = image.load_img(file_path, target_size=(IMG_ROWS, IMG_COLS))
            img_array = image.img_to_array(img)
            image_vector_2.append(img_array)
            labels_vector_2.append(all_folders.index(foldername))
        except:
            pass

# numpy 배열로 변환
image_vector_2 = np.array(image_vector_2)
labels_vector_2 = np.array(labels_vector_2)

y_2 = labels_vector_2

In [None]:
from sklearn.model_selection import train_test_split
X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(image_vector_2, y_2, test_size=0.20, random_state=0)

In [None]:
X_train_2, X_test_2 = X_train_2 / 255, X_test_2 / 255
# convert to categorical
# convert class vectors to binary class matrices
y_train_2 = tf.keras.utils.to_categorical(y_train_2, CLASSES)
y_test_2 = tf.keras.utils.to_categorical(y_test_2, CLASSES)

In [None]:
plt.imshow(X_train[0])

In [None]:
plt.imshow(X_train_2[0])

In [None]:
# model_saved.compile(loss='categorical_crossentropy', optimizer=optimizers.RMSprop(learning_rate=0.001), metrics=['accuracy'])
history = model_saved.fit(X_train_2, y_train_2, batch_size=32, epochs=1000, validation_split=0.1, callbacks=[early_stopping])

In [None]:
print(f'''\
# 증강 전
원본 데이터에 대한   기본 모델의 loss: {model.evaluate(X_test,y_test, verbose=0)[0]:0.3f}\
, accu: {model.evaluate(X_test,y_test, verbose=0)[1]:0.3f}
증강된 데이터에      기본 모델의 loss: {model.evaluate(X_test_2,y_test_2, verbose=0)[0]:0.3f}\
, accu: {model.evaluate(X_test_2,y_test_2, verbose=0)[1]:0.3f}

# 증강 후
원본 데이터에 대한/증강 훈련 모델의 loss: {model_saved.evaluate(X_test,y_test, verbose=0)[0]:0.3f}\
, accu: {model_saved.evaluate(X_test,y_test, verbose=0)[1]:0.3f}
증강된 데이터에   /증강 훈련 모델의 loss: {model_saved.evaluate(X_test_2,y_test_2, verbose=0)[0]:0.3f}\
, accu: {model_saved.evaluate(X_test_2,y_test_2, verbose=0)[1]:0.3f}\
''')