In [11]:
import pandas as pd
import numpy as np
import os
import cv2
import multiprocessing
from matplotlib import pyplot as plt

import keras
from keras import layers, models
from keras import Input
from keras.models import Model
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers, initializers, regularizers, metrics
from keras.callbacks import ModelCheckpoint
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.utils import np_utils

In [12]:
shape_label = {'삼각형': 0, '사각형': 1, '오각형': 2, '육각형': 3, '팔각형': 4, 
               '원형': 5, '반원형': 6, '타원형': 7, '장방형': 8, '마름모형': 9, '기타': 10}
shape_label

{'삼각형': 0,
 '사각형': 1,
 '오각형': 2,
 '육각형': 3,
 '팔각형': 4,
 '원형': 5,
 '반원형': 6,
 '타원형': 7,
 '장방형': 8,
 '마름모형': 9,
 '기타': 10}

In [13]:
DATA_PATH = '../data/mask_front'
LABEL_PATH = '../label'
SAVE_PATH = '../save_model'

In [14]:
params = {
    'img_size': (224, 224),
    'input_shape': (224, 224, 3),
    'batch_size': 24,
    'epochs': 20,
    'nb_workers': multiprocessing.cpu_count()
}

In [15]:
train_datagen=ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_generator=train_datagen.flow_from_directory(
    '../aug_data/aug_shape/',
    subset='training',
    class_mode='categorical',
    target_size=params['img_size'],
    batch_size=params['batch_size'])

valid_generator=train_datagen.flow_from_directory(
    '../aug_data/aug_shape/',
    subset='validation',
    class_mode='categorical',
    target_size=params['img_size'],
    batch_size=params['batch_size'])

Found 70400 images belonging to 11 classes.
Found 17600 images belonging to 11 classes.


In [16]:
# top-k
from functools import partial
top_2 = partial(keras.metrics.top_k_categorical_accuracy, k=2)
top_2.__name__ = 'top_2'

top_3 = partial(keras.metrics.top_k_categorical_accuracy, k=3)
top_3.__name__ = 'top_3'

top_5 = keras.metrics.top_k_categorical_accuracy
top_5.__name__ = 'top_5'

top_10 = partial(keras.metrics.top_k_categorical_accuracy, k=10)
top_10.__name__ = 'top_10'

In [17]:
######################## VGG16 #########################
from keras.applications import VGG16
cnn_model = VGG16(include_top=False, weights='imagenet', input_shape=params['input_shape'])
cnn_model.trainable = True
model = Sequential()
model.add(cnn_model)
model.add(layers.Flatten())
model.add(layers.Dense(4096, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1024, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(11, activation='softmax', kernel_initializer='he_normal'))
model.summary()

# ###
# dependencies = {
#     'top_2': top_2,
#     'top_3': top_3,
#     'top_5': top_5
# }

# model = keras.models.load_model(os.path.join(SAVE_PATH, 'VGG16_shape_ep005_vloss-1.3428_vacc-0.6989.h5'), custom_objects=dependencies)
# ###
filepath = os.path.join(SAVE_PATH, 'VGG16_shape_ep{epoch:03d}_vloss-{val_loss:.4f}_vacc-{val_acc:.4f}.h5')

checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_acc', verbose=1, save_best_only=False)
# earlystop = EarlyStopping(monitor='val_acc', min_delta=0, patience=5, verbose=1, mode='auto')

model.compile(loss='categorical_crossentropy', optimizer=optimizers.RMSprop(lr=2e-5), metrics=['acc', top_2, top_3])
# model.compile(loss='categorical_crossentropy', optimizer=optimizers.RMSprop(lr=2e-6), metrics=['acc', top_2, top_3])

history = model.fit_generator(train_generator,
                              steps_per_epoch = train_generator.samples // params['batch_size'],
                              validation_data=valid_generator, 
                              validation_steps = valid_generator.samples // params['batch_size'],
                              epochs=params['epochs'],
                              callbacks=[checkpoint],
                              workers=params['nb_workers'])


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten_1 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 4096)              102764544 
_________________________________________________________________
dropout_1 (Dropout)          (None, 4096)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 1024)              4195328   
_________________________________________________________________
dropout_2 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 11)                11275     
Total para

KeyboardInterrupt: 

In [7]:
def get_steps(num_samples, batch_size):
    if (num_samples % batch_size) > 0 :
        return (num_samples // batch_size) + 1
    else :
        return num_samples // batch_size

In [8]:
dataset_test = pd.read_csv(os.path.join(LABEL_PATH,'shape_color_prediction.csv'))
dataset_test = dataset_test[['No']]

from keras.models import load_model
params.update({
    'nb_test_samples': len(dataset_test)
})

test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=dataset_test,
    directory=DATA_PATH,
    x_col='No',
    y_col=None,
    target_size=params['img_size'],
    class_mode=None,
    batch_size=params['batch_size'],
    shuffle=False)

dependencies = {
    'top_2': top_2,
    'top_3': top_3,
    'top_5': top_5
}

model = keras.models.load_model(os.path.join(SAVE_PATH, 'VGG16_shape_ep004_vloss-0.9149_vacc-0.8872.h5'), custom_objects=dependencies)



prediction = model.predict_generator(generator = test_generator,
                                     steps = get_steps(params['nb_test_samples'], params['batch_size']),
                                     verbose=1,
                                     workers=params['nb_workers'])

Found 20959 validated image filenames.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.


In [9]:
predicted_class_indices=np.argmax(prediction, axis=1)

# Generator class dictionary mapping
labels = (train_generator.class_indices)
labels = dict((v, k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]
len(predictions)
submission = pd.read_csv(os.path.join(LABEL_PATH, 'shape_color_prediction.csv'))
submission['aug_shape_top1'] = predictions
submission.to_csv(os.path.join(LABEL_PATH, 'shape_color_prediction.csv'), index=False)

In [None]:
# def top_k_label(K):
#     top_list = []
#     predicted_class_indices=np.argmax(prediction, axis=1)
#     labels = (train_generator.class_indices)
#     labels = dict((v,k) for k,v in labels.items())
#     class_probs = prediction
#     for i, l in enumerate(predictions): # idx, label
#         class_prob = class_probs[i]
#         top_values = (-class_prob).argsort()[:K].tolist() # k 개까지 높은 확률 인덱스 저장
#         top_list.append(top_values)
        
#     top_arr = np.zeros((len(top_list), K))
    
#     for i in range(len(top_list)): # key - values 값 바꾸기
#         for j in range(K):
#             tmp = top_list[i][j]
#             top_arr[i][j] = labels[tmp]
    
#     label_list = top_arr.tolist()
#     return label_list


# # top_k_label(3)

In [None]:
# # top-k csv파일에 저장하기

# df = pd.read_csv(os.path.join(LABEL_PATH,'shape_color_prediction.csv'))
# df['aug_color_top3'] = top_k_label(3)
# df.to_csv(os.path.join(LABEL_PATH, 'shape_color_prediction.csv'), index=False)
# df.head()

In [10]:
df = pd.read_csv(os.path.join(LABEL_PATH,'shape_color_prediction.csv'))
count = 0
for i in range(len(df)):
    if df['shape'][i] == df['aug_shape_top1'][i]:
        count += 1
print(count/len(df))

0.9554368051910873
