In [None]:
# 모듈 로딩
import numpy as np
import pandas as pd
# import seaborn as sns
# import urllib.request
import random
import os
import cv2
import matplotlib.pyplot as plt
# plt.rcParams['font.family'] = 'Malgun Gothic'
# import warnings
# warnings.filterwarnings(action='ignore'

import tensorflow as tf
from keras.models import Sequential, Model, save_model #, load_model
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten, Activation
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.utils import to_categorical, plot_model, set_random_seed

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

In [None]:
# 랜덤값 고정
np.random.seed(42)
tf.random.set_seed(42)
set_random_seed(42)

In [None]:
# train, test 데이터 불러오기
train=pd.read_csv('/tour_data//train.csv')
test=pd.read_csv('/tour_data/test.csv')

### [1] 데이터 살펴보기

In [None]:
train.head()

In [None]:
# 결측치 확인
train.isnull().sum()

In [None]:
# 중복치 확인 및 제거
train.duplicated().sum()

In [None]:
# 대, 중, 소분류 확인
print(len(train.cat1.value_counts()), len(train.cat2.value_counts()), len(train.cat3.value_counts()))
print(train.cat1.value_counts(), '\n')
print(train.cat2.value_counts(), '\n')
print(train.cat3.value_counts())

## [1-2] 이미지 데이터(train) 불러오기

In [None]:
IMG_PATH='/tour_data//image/train/'
img_path_list=os.listdir(IMG_PATH)

In [None]:
img_list=[]
file_list=[]

for file in img_path_list:
    im = plt.imread(IMG_PATH + file)
    im = cv2.resize(im, (300, 200))
    im = im.reshape(-1, 300, 200, 3)
    img_list.append(im)
    file_list.append(file.split('.jpg')[0])

In [None]:
file_list[:5]

In [None]:
train_img_data=np.concatenate(img_list)

In [None]:
train_img_data.shape

## [2] 전처리

In [None]:
# 라벨 인코더
le = LabelEncoder()
target1 = le.fit_transform(train.cat1)
target2 = le.fit_transform(train.cat2)
target3 = le.fit_transform(train.cat3)

In [None]:
target1.shape, target2.shape, target3.shape

In [None]:
# train, test 분리
X_train, X_test, y_train, y_test=train_test_split(train_img_data, target2,
                                                 stratify=target2,
                                                 random_state=42,
                                                 test_size=0.2)

# train, val 분리
X_train, X_val, y_train, y_val=train_test_split(X_train, y_train,
                                                 stratify=y_train,
                                                 random_state=42,
                                                 test_size=0.2)

In [None]:
X_train.shape, X_test.shape, X_val.shape

## [3] 모델 구상

In [None]:
# 모델 구성
model = Sequential()

model.add(Conv2D(32, kernel_size=(3), # 자동으로 (3,3)
                 activation='relu',
                 input_shape=(300, 200, 3))) # (28, 28), 1은 흑백
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2))) # 축소
model.add(Dropout(0.25)) # 과적합 방지
model.add(Flatten()) # 펼침
model.add(Dense(128, activation='relu')) # 펼치고 Dense하면 정확도 상승
model.add(Dropout(0.5)) # 과적합 방지
model.add(Dense(18, activation='softmax')) # 출력층

In [None]:
model.summary()

In [None]:
plot_model(model, show_shapes=True)

## [3-2] 모델 생성

In [None]:
model.compile(loss='sparse_categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

In [None]:
MODEL_PATH='tour_model_cat2.h5'
stopCB = EarlyStopping(monitor='val_loss', patience=5)
cpCB = ModelCheckpoint(filepath=MODEL_PATH, verbose=1, save_best_only=True)
Epochs=15
Batch_size=64

## [3-3] 모델 학습

In [None]:
history=model.fit(X_train, y_train,
                 epochs=Epochs,
                 batch_size=Batch_size,
                 validation_data=(X_val, y_val),
                 callbacks=[stopCB, cpCB])

## [4] 모델평가

In [None]:
def plot_accuracy_and_loss(train_model):
    hist = train_model.history
    acc = hist['accuracy']
    val_acc = hist['val_accuracy']
    loss = hist['loss']
    val_loss = hist['val_loss']
    epochs = range(len(acc))
    f, ax = plt.subplots(1,2, figsize=(20, 8))
    ax[0].plot(epochs, acc, 'g', label='Training accuracy')
    ax[0].plot(epochs, val_acc, 'r', label='Validation accuracy')
    ax[0].set_title('Training and validation accuracy')
    ax[0].legend(fontsize=10)
    ax[1].plot(epochs, loss, 'g', label='Training loss')
    ax[1].plot(epochs, val_loss, 'r', label='Validation loss')
    ax[1].set_title('Training and validation loss')
    ax[1].legend()
    plt.show()
plot_accuracy_and_loss(history)

## [5] 예측