In [None]:
# 추가 고려 사항
# 모델에 metrics -> f1_score 를 넣을수는 없는가? 
# metrics=['accuracy', precision, recall, f1score]

In [None]:
# library load
import gc             # garbage collection library
import os             # 기본적인 환경을 setting 할 수 있는 library(ex) dir 등) 
import warnings       # warnings을 무시하기 위해 일반적으로 사용되는 library
import numpy as np    # numpy package : array, 배열 연산, slicing 에 자주 사용되는 library
import pandas as pd   # 주로 분석 모델에 사용하는 package
from tqdm import tqdm # 진행바를 만들어주는 library
from sklearn.model_selection import StratifiedKFold, KFold # K-fold 검증

from keras import backend as K # 딥러닝 모델 변수들의 초기값을 setting해 주기 위한 library
from keras.preprocessing.image import ImageDataGenerator # ImageDataGenerator

from keras.applications import Xception                                        # Xception pretrained Model 사용
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint  # callback 도구 : EarlyStopping, ReduceOnPlateau, ModelCheckPoint
from keras import layers, models, optimizers

warnings.filterwarnings(action = 'ignore')
K.image_data_format()

In [None]:
# os.listdir("../3rd-ml-month-car-image-cropping-dataset/")  
os.listdir("../input/3rd-ml-month-car-image-cropping-dataset/")  

In [None]:
DATA_PATH         = '../input/2019-3rd-ml-month-with-kakr/'  # 기본 디렉토리 설정하기 위한 변수 선언
DATA_CROPPED_PATH = '../input/3rd-ml-month-car-image-cropping-dataset/'
os.listdir(DATA_PATH)                                # 기본 디렉토리 설정 -> 해당 디렉토리의 file 이나 folder 확인 가능

# 이미지 폴더 경로 : cropped data 사용
TRAIN_IMG_PATH = os.path.join(DATA_CROPPED_PATH, 'train_crop')
TEST_IMG_PATH  = os.path.join(DATA_CROPPED_PATH, 'test_crop')

# CSV 파일 경로를 통해 data read : train, test, class
df_train = pd.read_csv(os.path.join(DATA_PATH, 'train.csv'))
df_test  = pd.read_csv(os.path.join(DATA_PATH, 'test.csv'))
df_class = pd.read_csv(os.path.join(DATA_PATH, 'class.csv'))

df_train.head() # 몇개만 데이터 확인해보기 위해, head() 사용
df_test.head()  # test data 이기 때문에, class가 없는 것을 확인!

In [None]:
# 지표 선택
# 이번 컴페티션에서 f1_score가 지표.
# f1_score를 내가 만들고자 하는 모델에 집어 넣을 수 있는가?
from sklearn.metrics import f1_score
def micro_f1(y_true, y_pred):
    return f1_score(y_true, y_pred, average= 'micro')

In [None]:
# ImageDataGenerator를 이용하여 train, valid, test datagen 만들기.
# Jang님 kernel 참조!

train_datagen = ImageDataGenerator(
    rescale=1./255,
    #featurewise_center= True,  # set input mean to 0 over the dataset
    #samplewise_center=True,  # set each sample mean to 0
    #featurewise_std_normalization= True,  # divide inputs by std of the dataset
    #samplewise_std_normalization=True,  # divide each input by its std
    rotation_range       = 30,
    width_shift_range    = 0.2,
    height_shift_range   = 0.2,
    horizontal_flip      = True,
    vertical_flip        = False,
    zoom_range           = 0.3,
    shear_range          = 0.3,
    # brightness_range=(1, 1.2),
    fill_mode            = 'nearest'
    )

valid_datagen = ImageDataGenerator(
    rescale= 1./255,
    #featurewise_center= True,  # set input mean to 0 over the dataset
    #samplewise_center=True,  # set each sample mean to 0
    #featurewise_std_normalization= True,  # divide inputs by std of the dataset
    #samplewise_std_normalization=True  # divide each input by its std
    )
test_datagen = ImageDataGenerator(
    rescale= 1./255
    #featurewise_center= True,  # set input mean to 0 over the dataset
    #samplewise_center=True,  # set each sample mean to 0
    #featurewise_std_normalization= True,  # divide inputs by std of the dataset
    #samplewise_std_normalization=True,  # divide each input by its std
    )

In [None]:
def call_back(model_name, patient):
    ES = EarlyStopping(
        monitor='val_loss', 
        patience=patient, 
        mode='min', 
        verbose=1)
    RR = ReduceLROnPlateau(
        monitor = 'val_loss', 
        factor = 0.5, 
        patience = patient / 2, 
        min_lr=0.000001, 
        verbose=1, 
        mode='min')
    MC = ModelCheckpoint(
        filepath=model_name, 
        monitor='val_loss', 
        verbose=1, 
        save_best_only=True, 
        mode='min')

    return [ES, RR, MC]

In [None]:
# 모델 층 설계
def get_model(model_name, iamge_size):
    base_model = model_name(weights     = 'imagenet', 
                            input_shape = (iamge_size, iamge_size, 3), # Xception 모델에 맞는 input shape : 299 * 299 
                            include_top = False
                           )
    
    model = models.Sequential()
    model.add(base_model)
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dense(1024, activation='relu'))
    model.add(layers.Dropout(0.25))
    model.add(layers.Dense(1024, activation='relu'))
    model.add(layers.Dropout(0.25))
 
    model.add(layers.Dense(196, activation='softmax'))
    model.summary()

    optimizer = optimizers.RMSprop(lr=0.0001)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics= ['acc'] )

    return model
    
    

In [None]:
df_train['class'] = df_train['class'].astype('str')
df_train          = df_train[['img_file', 'class']]
df_test           = df_test[['img_file']]

In [None]:
# 검증 방법 선택.
# k-fold 검증 방법을 선택.
# 만약 9시간을 초과하는 경우, 다른 방법을 강구 해야 함. 
# -> 커널을 여러개 만들어서 각각 모델을 만들고, 결과 값을 평균 내서 제출.

# 우선 sklearn 의 StratifiedKFold 함수를 이용해, k = 5 인 해당 객체 생성.
K          = 5
IMAGE_SIZE = 299 # Xception Model은 299가 가장 최적.
BATCH_SIZE = 32
EPOCH      = 20
model_path = './'
model_xception_names = []

skf = StratifiedKFold(n_splits = K, random_state = 2019)

In [None]:
### 모델 수행 : Xception Model

j = 1
model_xception_names = []
for (train_index, valid_index) in skf.split(
    df_train['img_file'], 
    df_train['class']):

    traindf = df_train.iloc[train_index, :].reset_index()
    validdf = df_train.iloc[valid_index, :].reset_index()

    print("=========================================")
    print("====== K Fold Validation step => %d/%d =======" % (j , K))
    print("=========================================")
    
    train_generator = train_datagen.flow_from_dataframe(
        dataframe   = traindf,
        directory   = TRAIN_IMG_PATH,
        x_col       = 'img_file',
        y_col       = 'class',
        target_size = (IMAGE_SIZE, IMAGE_SIZE),
        color_mode  = 'rgb',
        class_mode  = 'categorical',
        batch_size  =  BATCH_SIZE,
        seed        =  2019,
        shuffle     = True
        )

    valid_generator = valid_datagen.flow_from_dataframe(
        dataframe   = validdf,
        directory   = TRAIN_IMG_PATH,
        x_col       = 'img_file',
        y_col       = 'class',
        target_size = (IMAGE_SIZE, IMAGE_SIZE),
        color_mode  = 'rgb',
        class_mode  ='categorical',
        batch_size  = BATCH_SIZE,
        seed        = 2019,
        shuffle     = True
        )

    model_name = model_path + str(j) + '_xception.hdf5'
    model_xception_names.append(model_name)
    model_xception = get_model(Xception, IMAGE_SIZE)
    
    try:
        model_xception.load_weights(model_name)
    except:
        pass
        
    history = model_xception.fit_generator(
        train_generator,
        steps_per_epoch  = len(traindf.index) / BATCH_SIZE,
        epochs           = EPOCH,
        validation_data  = valid_generator,
        validation_steps = len(validdf.index) / BATCH_SIZE,
        verbose          = 1,
        shuffle          = False,
        callbacks        = call_back( model_name, 6)
        )
        
    j+=1

In [None]:
# 모델 평가
# xception_prediction  = [] # 예측 값을 저장할 list 선언
# model_xception_names = [] # 모델 경로 + 이름을 저장할 list 선언
# for i in range(1, 4):
#     model_xception_names.append(model_path + str(i) + '_xception.hdf5')

# test_generator 생성
test_generator = test_datagen.flow_from_dataframe(
    dataframe = df_test,
    directory = TEST_IMG_PATH,
    x_col     = 'img_file',
    y_col     = None,
    target_size = (IMAGE_SIZE, IMAGE_SIZE),
    color_mode  = 'rgb',
    class_mode  = None,
    batch_size  = BATCH_SIZE,
    shuffle     = False
)

In [None]:
for i, name in enumerate(model_xception_names):
    model_xception = get_model(Xception, IMAGE_SIZE)
    model_xception.load_weights(name)
    test_generator.reset()
    pred = model_xception.predict_generator(
        generator = test_generator,
        steps     = len(df_test) / BATCH_SIZE,
        verbose   = 1
    )
    xception_prediction.append(pred)
y_pred_xception = np.mean(xception_prediction, axis=0)

In [None]:
preds_class_indices=np.argmax(y_pred_xception, axis=1)

In [None]:
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
final_pred = [labels[k] for k in preds_class_indices]

In [None]:
submission = pd.read_csv(os.path.join(DATA_PATH, 'sample_submission.csv'))
submission["class"] = final_pred
submission.to_csv("submission.csv", index=False)
submission.head()