In [1]:
import pandas as pd
import numpy as np
import os
import cv2
import multiprocessing
from matplotlib import pyplot as plt

import keras
from keras import layers, models
from keras import Input
from keras.models import Model
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers, initializers, regularizers, metrics
from keras.callbacks import ModelCheckpoint
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.utils import np_utils

Using TensorFlow backend.


In [2]:
shape_label = {'삼각형': 0, '사각형': 1, '오각형': 2, '육각형': 3, '팔각형': 4, 
               '원형': 5, '반원형': 6, '타원형': 7, '장방형': 8, '마름모형': 9, '기타': 10}
shape_label

{'삼각형': 0,
 '사각형': 1,
 '오각형': 2,
 '육각형': 3,
 '팔각형': 4,
 '원형': 5,
 '반원형': 6,
 '타원형': 7,
 '장방형': 8,
 '마름모형': 9,
 '기타': 10}

In [3]:
DATA_PATH = '../data/mask_front'
LABEL_PATH = '../label'
SAVE_PATH = '../save_model'

In [4]:
# # csv 파일 처음 만들 때
# img_list = os.listdir(DATA_PATH)
# img_list = [e.split('.')[0] for e in img_list]

# xls = pd.read_excel(os.path.join(LABEL_PATH, 'label_shape.xls'))
# df = pd.DataFrame(xls)
# df2 = df[df.columns[-1]].map(shape_label)
# df2 = pd.DataFrame(df2)
# df['shape'] = df2['shape']
# df2 = df[df['No'].isin(img_list)]
# df2 = df2.drop_duplicates(['No'], keep='first')
# df2 = df2.reset_index()
# for i in range(len(df2)):
# #     df2['shape'][i] = str(df2['shape'][i])
#     df2['No'][i] = str(df2['No'][i]) + '.jpg'
#     if i % 100 == 0:
#       print(i)

# # csv 파일로 만들기
# df2.to_csv(os.path.join(LABEL_PATH,'shape_test.csv'), mode='w')
# dataset = df2
# dataset['shape'] = dataset['shape'].apply(str)

In [5]:
# csv 파일이 이미 있을 때
dataset = pd.read_csv(os.path.join(LABEL_PATH,'shape.csv'))
dataset['shape'] = dataset['shape'].apply(str)

In [6]:
from sklearn.model_selection import train_test_split

dataset = dataset[['No', 'shape']]
its = np.arange(dataset.shape[0])
train_idx, val_idx = train_test_split(its, train_size = 0.8, random_state=42)

X_train = dataset.iloc[train_idx, :]
X_val = dataset.iloc[val_idx, :]

print(X_train.shape)
print(X_val.shape)

(16767, 2)
(4192, 2)


In [7]:
params = {
#     'rotation_range': 10,
#     # 좌우로 이동할 최대 비율
#     'width_shift_range': 0.20,
#     # 상하로 이동할 최대 비율
#     'height_shift_range': 0.20,
#     # 회전 및 밀림 값의 최대 라디안
#     'shear_range': 0.10,
#     # 축소/확대 할 최대 비율
#     'zoom_range': 0.20,
#     'horizontal_flip': True,
#     'brightness_range': (0.7, 1.5),
    ##########################
    'img_size': (224, 224),
    'input_shape': (224, 224, 3),
    'nb_train_samples': len(X_train),
    'nb_validation_samples': len(X_val),
#     'img_size': (299, 299),
#     'input_shape': (299, 299, 3),
    'batch_size': 24,
    'epochs': 10,
    'nb_workers': multiprocessing.cpu_count()
}

In [8]:
def get_steps(num_samples, batch_size):
    if (num_samples % batch_size) > 0 :
        return (num_samples // batch_size) + 1
    else :
        return num_samples // batch_size

In [9]:
# datagen = ImageDataGenerator(
#     rotation_range=params['rotation_range'],
#     width_shift_range=params['width_shift_range'],
#     height_shift_range=params['height_shift_range'],
# #     shear_range=params['shear_range'],
#     zoom_range=params['zoom_range'],
#     horizontal_flip=params['horizontal_flip'],
#     brightness_range=params['brightness_range'],
#     rescale=1./255,
#     validation_split=0.2)

datagen=ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_generator=datagen.flow_from_dataframe(dataframe=dataset,
                                            directory=DATA_PATH,
                                            x_col='No',
                                            y_col='shape',
                                            subset='training',
                                            class_mode='categorical',
                                            target_size=params['img_size'],
                                            batch_size=params['batch_size'])

valid_generator=datagen.flow_from_dataframe(dataframe=dataset,
                                            directory=DATA_PATH,
                                            x_col='No',
                                            y_col='shape',
                                            subset='validation',
                                            class_mode='categorical',
                                            target_size=params['img_size'],
                                            batch_size=params['batch_size'])
#                                             shuffle=False)

Found 16768 validated image filenames belonging to 11 classes.
Found 4191 validated image filenames belonging to 11 classes.


In [10]:
# top-k
from functools import partial
top_2 = partial(keras.metrics.top_k_categorical_accuracy, k=2)
top_2.__name__ = 'top_2'

top_3 = partial(keras.metrics.top_k_categorical_accuracy, k=3)
top_3.__name__ = 'top_3'

top_5 = keras.metrics.top_k_categorical_accuracy
top_5.__name__ = 'top_5'

top_10 = partial(keras.metrics.top_k_categorical_accuracy, k=10)
top_10.__name__ = 'top_10'

In [11]:
from keras.applications import VGG16
cnn_model = VGG16(weights='imagenet', include_top=False, input_shape=params['input_shape'])
cnn_model.trainable = True
model = Sequential()
model.add(cnn_model)
model.add(layers.Flatten())
model.add(layers.Dense(4096, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1024, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(11, activation='softmax', kernel_initializer='he_normal'))
# model.summary()
filepath = os.path.join(SAVE_PATH, 'VGG16_shape_ep{epoch:03d}_vloss-{val_loss:.4f}_vacc-{val_acc:.4f}.h5')


checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_acc', verbose=1, save_best_only=True)
# earlystop = EarlyStopping(monitor='val_acc', min_delta=0, patience=5, verbose=1, mode='auto')

model.compile(loss='categorical_crossentropy', optimizer=optimizers.RMSprop(lr=2e-5), metrics=['acc', top_2, top_3])
# model.compile(loss='categorical_crossentropy', optimizer=optimizers.RMSprop(lr=1e-5), metrics=['acc', 'top_k_categorical_accuracy', top_10])


history = model.fit_generator(train_generator,
                              steps_per_epoch = get_steps(params['nb_train_samples'], params['batch_size']),
                              epochs=params['epochs'],
                              validation_data=valid_generator, 
                              validation_steps = get_steps(params['nb_validation_samples'], params['batch_size']),
                              callbacks=[checkpoint],
                              workers=params['nb_workers'])

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Epoch 1/10
 40/699 [>.............................] - ETA: 5:16 - loss: 0.7881 - acc: 0.7563 - top_2: 0.8802 - top_3: 0.9156

KeyboardInterrupt: 

In [None]:
dataset_test = pd.read_csv(os.path.join(LABEL_PATH,'shape_color_prediction.csv'))
dataset_test = dataset_test[['No']]

from keras.models import load_model
params.update({
    'nb_test_samples': len(dataset_test)
})

test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=dataset_test,
    directory=DATA_PATH,
    x_col='No',
    y_col=None,
    target_size=params['img_size'],
    class_mode=None,
    batch_size=params['batch_size'],
    shuffle=False)

dependencies = {
    'top_2': top_2,
    'top_3': top_3,
    'top_5': top_5
}
model = keras.models.load_model(os.path.join(SAVE_PATH, 'VGG16_shape_ep004_vloss-0.1574_vacc-0.9759.h5'), custom_objects=dependencies)

prediction = model.predict_generator(generator = test_generator,
                                     steps = get_steps(params['nb_test_samples'], params['batch_size']),
                                     verbose=1,
                                     workers=params['nb_workers'])

In [None]:
predicted_class_indices=np.argmax(prediction, axis=1)

# Generator class dictionary mapping
labels = (train_generator.class_indices)
labels = dict((v, k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

# submission = pd.read_csv(os.path.join(LABEL_PATH, 'shape_color_prediction.csv'))
# submission['shape_top1'] = predictions
# submission.to_csv(os.path.join(LABEL_PATH, 'shape_color_prediction.csv'), index=False)

In [None]:
################# 지우지말자 #################

def top_k_label(K):
    top_list = []
    predicted_class_indices=np.argmax(prediction, axis=1)
    labels = (train_generator.class_indices)
    labels = dict((v,k) for k,v in labels.items())
    class_probs = prediction
    for i, l in enumerate(predictions): # idx, label
        class_prob = class_probs[i]
        top_values = (-class_prob).argsort()[:K].tolist() # k 개까지 높은 확률 인덱스 저장
        top_list.append(top_values)
    
    top_arr = np.zeros((len(top_list), K))
    
    for i in range(len(top_list)): # key - values 값 바꾸기
        for j in range(K):
            tmp = top_list[i][j]
            top_arr[i][j] = labels[tmp]
    
    label_list = top_arr.tolist()
    return label_list


# top_k_label(3)

In [None]:
# # top-k csv파일에 저장하기

# df = pd.read_csv(os.path.join(LABEL_PATH,'shape_color_prediction.csv'))
# df['shape_top3'] = top_k_label(3)
# df.to_csv(os.path.join(LABEL_PATH, 'shape_color_prediction.csv'), index=False)
# df.head()