<a href="https://colab.research.google.com/github/scv1702/eel-average-weight/blob/master/eel_average_weight.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 스마트 양식장 뱀장어 평균 중량 인식모델 개발 온라인 해커톤
스마트 양식장 수조 내 뱀장어 이미지를 인식하여 해당 수조 내 뱀장어의 평균 중량을 인식하는 모델 개발하기

## 대회 주제 및 목표
- 주제 : 스마트 양식장 수조 내 이미지를 활용하여 뱀장어의 중량을 인식하는 인공지능 모델 개발

## 주최/주관
- 주최 : 과학기술정보통신부, 한국지능정보사회진흥원(NIA), 한국판뉴딜
- 주관 : 명선해양산업 주식회사, 주식회사 아이싸이랩
- 운영 : 인공지능팩토리

작성자: 신찬규 (scv1702@naver.com)


## 구글 드라이브 마운트

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## 모듈

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import PIL.Image as pilimg
import cv2
import pandas as pd
import os
import time
import shutil
import gc

from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense , Conv2D, Activation, MaxPooling2D , GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau , EarlyStopping , ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## 환경 변수

In [None]:
TRAIN_PATH = "/content/drive/MyDrive/eel-average-weight/dataset/train/"
TRAIN_RESIZE_PATH = "/content/drive/MyDrive/eel-average-weight/dataset/train_resize/"

TEST_PATH = "/content/drive/MyDrive/eel-average-weight/dataset/test/"
TEST_RESIZE_PATH = "/content/drive/MyDrive/eel-average-weight/dataset/test_resize/"

TRAIN_LABEL_PATH = "/content/drive/MyDrive/eel-average-weight/dataset/train.csv"

RESULT_PATH = "/content/drive/MyDrive/eel-average-weight/result/"
MODEL_PATH = "/content/drive/MyDrive/eel-average-weight/model/"

IMAGE_SIZE = 240

EPOCHS = 50

## 폴더 생성
가중치와 예측 결과를 저장할 폴더 생성

In [None]:
os.mkdir(RESULT_PATH)
os.mkdir(MODEL_PATH)

## 유틸리티

In [None]:
# delete useless weights
def delete_weight():
    files = os.listdir(MODEL_PATH)
    files = [x for x in files if ".hdf5" in x]

    for f in files:
        if ".hdf5":
            os.remove(MODEL_PATH + f)

    print("delete weights complete")

# clean memory
def clean_memory():
    tf.keras.backend.clear_session()
    print("clean memory complete ", gc.collect())

In [None]:
# delete_weight()
clean_memory()

## 이미지 전처리
1920x1080 이미지를 240x240으로 크기 변경

In [None]:
# resize images to (IMAGE_SIZE, IMAGE_SIZE)
def resize_data(data):
    if data == "train":
        RESIZE_PATH = TRAIN_RESIZE_PATH
        ORIGINAL_PATH = TRAIN_PATH
    elif data == "test":
        RESIZE_PATH = TEST_RESIZE_PATH
        ORIGINAL_PATH = TEST_PATH

    dirs = os.listdir(ORIGINAL_PATH)

    for i in range(len(dirs)):
        imgs = os.listdir(ORIGINAL_PATH + dirs[i] + "/")
        imgs = [x for x in imgs if ".jpg" in x]

        for j in range(len(imgs)):
            IMG_PATH = ORIGINAL_PATH + dirs[i] + "/" + imgs[j]
            img = cv2.imread(IMG_PATH)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            resized_img = cv2.resize(img, dsize=(IMAGE_SIZE, IMAGE_SIZE))
            cv2.imwrite(RESIZE_PATH + dirs[i] + "/" + imgs[j], resized_img)

    print(f"{data} resizing [{i+1}/{len(dirs)}] complete")

In [None]:
resize_data("train")
resize_data("test")

## ImageDataGenerator를 위한 DataFrame 생성

In [None]:
def get_tr_val_df():
    train_label_csv = pd.read_csv(TRAIN_LABEL_PATH).sort_values(by=['ImageDir'], axis=0)
    train_label_csv = np.array(train_label_csv['AvgWeight'])

    dirs = sorted(os.listdir(TRAIN_PATH))

    path = []
    label = []

    for i in range(0, len(dirs), 1):
        dir = sorted(os.listdir(TRAIN_PATH + dirs[i] + "/"))
        for j in range(0, len(dir), 1):
            ext = os.path.splitext(dir[j])[-1]
            if ext == ".jpg":
                IMG_PATH = TRAIN_PATH + dirs[i] + "/" + dir[j]
                path.append(IMG_PATH)
                label.append(train_label_csv[i])

    dataset_df = pd.DataFrame({'path': path, 'label': label})

    tr_df, val_df = train_test_split(dataset_df, test_size=0.15, stratify=dataset_df['label'], random_state=2022)
    
    return tr_df, val_df

def get_test_df():
    dirs = sorted(os.listdir(TEST_PATH))
    path = []
    label = []
    img_dir = []

    for i in range(0, len(dirs), 1):
        dir = sorted(os.listdir(TEST_PATH + dirs[i] + "/"))
        for j in range(0, len(dir), 1):
            ext = os.path.splitext(dir[j])[-1]
            if ext == ".jpg":
                IMG_PATH = TEST_PATH + dirs[i] + "/" + dir[j]
                path.append(IMG_PATH)
                label.append("0")
                img_dir.append(dir[j])

    test_df = pd.DataFrame({'path': path, 'ImageDir': img_dir, 'label': label})
    
    return test_df

## 모델 생성

In [None]:
def create_model(verbose=False):
    input_tensor = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
 
    x = Conv2D(filters=64, kernel_size=(3, 3), padding='same', kernel_initializer='he_normal')(input_tensor)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(filters=64, kernel_size=(3, 3), padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(filters=128, kernel_size=(3, 3), padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(filters=256, kernel_size=3, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(filters=256, kernel_size=3, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(filters=256, kernel_size=3, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(filters=512, kernel_size=3, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(filters=512, kernel_size=3, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(filters=512, kernel_size=3, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(filters=512, kernel_size=3, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(filters=512, kernel_size=3, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(filters=512, kernel_size=3, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = GlobalAveragePooling2D()(x)
    x = Dense(100, activation='relu')(x)
    output = Dense(1)(x)

    model = Model(inputs=input_tensor, outputs=output)

    model.compile(optimizer=Adam(), loss='mse') 

    if verbose:
        model.summary()

    return model

In [None]:
model = create_model(verbose=True)

## 모델 학습 수행

In [None]:
def train_model(model, verbose=True):
    # dataframe
    tr_df, val_df = get_tr_val_df()

    # setting image data generators
    tr_gen = ImageDataGenerator(rescale=1/255.0)
    val_gen = ImageDataGenerator(rescale=1/255.0)

    tr_flow_gen = tr_gen.flow_from_dataframe(dataframe=tr_df,
                                            x_col='path',
                                            y_col='label',
                                            target_size=(IMAGE_SIZE, IMAGE_SIZE),
                                            class_mode='raw',
                                            shuffle=True)

    val_flow_gen = val_gen.flow_from_dataframe(dataframe=val_df,
                                            x_col='path',
                                            y_col='label',
                                            target_size=(
                                            IMAGE_SIZE, IMAGE_SIZE),
                                            class_mode='raw',
                                            shuffle=False)

    # callbacks
    rlr_cb = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5,mode='min', verbose=1)
    ely_cb = EarlyStopping(monitor='val_loss', patience=10, mode='min', verbose=1)
    mcp_cb = ModelCheckpoint(filepath=MODEL_PATH + '/weights.{epoch:02d}-{val_loss:.2f}.hdf5',
                            monitor='val_loss', save_best_only=True,
                            save_weights_only=True, mode='min',
                            period=5, verbose=1)

    # train the model
    history = model.fit(tr_flow_gen, epochs=EPOCHS,
                        validation_data=val_flow_gen,
                        callbacks=[rlr_cb, ely_cb, mcp_cb],
                        verbose=verbose)

    return history

In [None]:
history = train_model(model)

## 학습 결과 출력

In [None]:
def show_history(history):
    plt.figure(figsize=(32, 32))
    plt.yticks(np.arange(0, 1, 0.05))
    plt.xticks(np.arange(0, EPOCHS, 2))
    plt.plot(history.history['loss'], label='train')
    plt.plot(history.history['val_loss'], label='valid')
    plt.legend()

In [None]:
show_history(history)

## 예측 수행

In [None]:
def predict(model):
    test_df = get_test_df()

    test_gen = ImageDataGenerator(rescale=1/255.0)
    test_flow_gen = test_gen.flow_from_dataframe(dataframe=test_df,
                                                x_col='path',
                                                y_col='label',
                                                target_size=(IMAGE_SIZE, IMAGE_SIZE),
                                                class_mode='raw',
                                                shuffle=False)

    predict_result = model.predict(test_flow_gen, verbose=True)
    
    predict_result.squeeze()

    return predict_result

In [None]:
predict_result = predict(model)

## 예측 결과를 csv 파일로 변환

In [None]:
def predict_to_csv(predict_result):
    result = []

    dirs = sorted(os.listdir(TEST_PATH))
    img_dir = []
    
    for i in range(len(dirs)):
        imgs = sorted(os.listdir(TEST_PATH + dirs[i] + "/"))
        imgs = [x for x in imgs if ".jpg" in x]
        
        sum = np.sum(predict_result)
        mean = np.mean(sum)
        
        result.append(mean)

        for _ in imgs:
            img_dir.append(dirs[i])

    now = time.localtime()
    result_df = pd.DataFrame({'ImageDir': img_dir, 'AvgWeight': result})
    FILE_NAME = f"{now.tm_year}_{now.tm_mon}{now.tm_mday}_{now.tm_hour}_{now.tm_min}_{now.tm_sec}.csv"
    result_df.to_csv(RESULT_PATH + FILE_NAME, index=False, encoding='cp949')

    print("convert complete")

In [None]:
predict_to_csv(predict_result)

## 가중치를 MODEL_PATH로 이동

In [None]:
def move_weight_dir():
    now = time.localtime()
    DIR_NAME = f"{now.tm_year}_{now.tm_mon}{now.tm_mday}_{now.tm_hour}_{now.tm_min}_{now.tm_sec}/"
    os.mkdir(MODEL_PATH + DIR_NAME)

    files = os.listdir(MODEL_PATH)

    for f in files:
        ext = os.path.splitext(f)[-1]
        if ext == ".hdf5":
            shutil.move(MODEL_PATH + f, MODEL_PATH + DIR_NAME + f)

    print(f"weights are saved in {MODEL_PATH + DIR_NAME}")

In [None]:
move_weight_dir()