In [1]:
import pandas as pd
import numpy as np
import os
import cv2
import multiprocessing
from matplotlib import pyplot as plt

import keras
from keras import layers, models
from keras import Input
from keras.models import Model
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers, initializers, regularizers, metrics
from keras.callbacks import ModelCheckpoint
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.utils import np_utils

Using TensorFlow backend.


In [2]:
shape_label = {'삼각형': 0, '사각형': 1, '오각형': 2, '육각형': 3, '팔각형': 4, 
               '원형': 5, '반원형': 6, '타원형': 7, '장방형': 8, '마름모형': 9, '기타': 10}
shape_label

color_label = {'빨강': 0, '주황': 1, '노랑': 2, '연두': 3, '초록': 4, 
               '청록': 5, '파랑': 6, '남색': 7, '보라': 8, '분홍': 9,
               '자주': 10, '갈색': 11, '회색': 12, '검정': 13, '하양': 14, '투명': 15}
color_label

{'빨강': 0,
 '주황': 1,
 '노랑': 2,
 '연두': 3,
 '초록': 4,
 '청록': 5,
 '파랑': 6,
 '남색': 7,
 '보라': 8,
 '분홍': 9,
 '자주': 10,
 '갈색': 11,
 '회색': 12,
 '검정': 13,
 '하양': 14,
 '투명': 15}

In [3]:
DATA_PATH = '../data/mask_front'
LABEL_PATH = '../label'
SAVE_PATH = '../save_model'

In [4]:
params = {
    'img_size': (224, 224),
    'input_shape': (224, 224, 3),
#     'nb_train_samples': len(X_train),
#     'nb_validation_samples': len(X_val),
#     'img_size': (299, 299),
#     'input_shape': (299, 299, 3),
    'batch_size': 24,
    'epochs': 10,
    'nb_workers': multiprocessing.cpu_count()
}

In [5]:
# csv 파일이 이미 있을 때
dataset = pd.read_csv(os.path.join(LABEL_PATH,'shape.csv'))
dataset['shape'] = dataset['shape'].apply(str)

In [6]:
from sklearn.model_selection import train_test_split

dataset = dataset[['No', 'shape']]
its = np.arange(dataset.shape[0])
train_idx, val_idx = train_test_split(its, train_size = 0.8, random_state=42)

X_train = dataset.iloc[train_idx, :]
X_val = dataset.iloc[val_idx, :]

params.update({
    'nb_train_samples': len(X_train),
    'nb_validation_samples': len(X_val)
})

print(X_train.shape)
print(X_val.shape)

(16767, 2)
(4192, 2)


In [7]:
def get_steps(num_samples, batch_size):
    if (num_samples % batch_size) > 0 :
        return (num_samples // batch_size) + 1
    else :
        return num_samples // batch_size

In [8]:
# top-k
from functools import partial
top_2 = partial(keras.metrics.top_k_categorical_accuracy, k=2)
top_2.__name__ = 'top_2'

top_3 = partial(keras.metrics.top_k_categorical_accuracy, k=3)
top_3.__name__ = 'top_3'

top_5 = keras.metrics.top_k_categorical_accuracy
top_5.__name__ = 'top_5'

top_10 = partial(keras.metrics.top_k_categorical_accuracy, k=10)
top_10.__name__ = 'top_10'

In [9]:
datagen=ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_generator=datagen.flow_from_dataframe(dataframe=dataset,
                                            directory=DATA_PATH,
                                            x_col='No',
                                            y_col='shape',
                                            subset='training',
                                            class_mode='categorical',
                                            target_size=params['img_size'],
                                            batch_size=params['batch_size'])

valid_generator=datagen.flow_from_dataframe(dataframe=dataset,
                                            directory=DATA_PATH,
                                            x_col='No',
                                            y_col='shape',
                                            subset='validation',
                                            class_mode='categorical',
                                            target_size=params['img_size'],
                                            batch_size=params['batch_size'],
                                            shuffle=False)

Found 16768 validated image filenames belonging to 11 classes.
Found 4191 validated image filenames belonging to 11 classes.


In [10]:
dataset_test = pd.read_csv(os.path.join(LABEL_PATH,'shape_color_prediction.csv'))
dataset_test = dataset_test[['No']]

from keras.models import load_model
params.update({
    'nb_test_samples': len(dataset_test)
})

test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=dataset_test,
    directory=DATA_PATH,
    x_col='No',
    y_col=None,
    target_size=params['img_size'],
    class_mode=None,
    batch_size=params['batch_size'],
    shuffle=False)

dependencies = {
    'top_2': top_2,
    'top_3': top_3,
    'top_5': top_5
}
model = keras.models.load_model(os.path.join(SAVE_PATH, 'VGG16_shape_ep004_vloss-0.1574_vacc-0.9759.h5'), custom_objects=dependencies)

prediction = model.predict_generator(generator = test_generator,
                                     steps = get_steps(params['nb_test_samples'], params['batch_size']),
                                     verbose=1,
                                     workers=params['nb_workers'])

Found 20959 validated image filenames.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.


In [11]:
predicted_class_indices=np.argmax(prediction, axis=1)

# Generator class dictionary mapping
labels = (train_generator.class_indices)
labels = dict((v, k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

# submission = pd.read_csv(os.path.join(LABEL_PATH, 'shape_color_prediction.csv'))
# submission['shape_prediction'] = predictions
# submission.to_csv(os.path.join(LABEL_PATH, 'shape_color_prediction.csv'), index=False)

In [12]:
################# 지우지말자 #################

def top_k_label(K):
    top_list = []
    predicted_class_indices=np.argmax(prediction, axis=1)
    labels = (train_generator.class_indices)
    labels = dict((v,k) for k,v in labels.items())
    class_probs = prediction
    for i, l in enumerate(predictions): # idx, label
        class_prob = class_probs[i]
        top_values = (-class_prob).argsort()[:K].tolist() # k 개까지 높은 확률 인덱스 저장
        top_list.append(top_values)
    
    top_arr = np.zeros((len(top_list), K))
    
    for i in range(len(top_list)): # key - values 값 바꾸기
        for j in range(K):
            tmp = top_list[i][j]
            top_arr[i][j] = labels[tmp]
    
    label_list = top_arr.tolist()
    return label_list


# top_k_label(3)

In [13]:
shape_list = top_k_label(3)

In [None]:
############### color ##############

In [14]:
# csv 파일이 이미 있을 때
dataset = pd.read_csv(os.path.join(LABEL_PATH,'color.csv'))
dataset['color_front'] = dataset['color_front'].apply(str)

In [15]:
from sklearn.model_selection import train_test_split

dataset = dataset[['No', 'color_front']]
its = np.arange(dataset.shape[0])
train_idx, val_idx = train_test_split(its, train_size = 0.8, random_state=42)

X_train = dataset.iloc[train_idx, :]
X_val = dataset.iloc[val_idx, :]

params.update({
    'nb_train_samples': len(X_train),
    'nb_validation_samples': len(X_val)
})

print(X_train.shape)
print(X_val.shape)

(16767, 2)
(4192, 2)


In [16]:
datagen=ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_generator=datagen.flow_from_dataframe(dataframe=dataset,
                                            directory=DATA_PATH,
                                            x_col='No',
                                            y_col='color_front',
                                            subset='training',
                                            class_mode='categorical',
                                            target_size=params['img_size'],
                                            batch_size=params['batch_size'])

valid_generator=datagen.flow_from_dataframe(dataframe=dataset,
                                            directory=DATA_PATH,
                                            x_col='No',
                                            y_col='color_front',
                                            subset='validation',
                                            class_mode='categorical',
                                            target_size=params['img_size'],
                                            batch_size=params['batch_size'],
                                            shuffle=False)

Found 16768 validated image filenames belonging to 16 classes.
Found 4191 validated image filenames belonging to 16 classes.


In [17]:
dataset_test = pd.read_csv(os.path.join(LABEL_PATH,'shape_color_prediction.csv'))
dataset_test = dataset_test[['No']]

from keras.models import load_model
params.update({
    'nb_test_samples': len(dataset_test)
})

test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=dataset_test,
    directory=DATA_PATH,
    x_col='No',
    y_col=None,
    target_size=params['img_size'],
    class_mode=None,
    batch_size=params['batch_size'],
    shuffle=False)

dependencies = {
    'top_2': top_2,
    'top_3': top_3,
    'top_5': top_5
}

model = keras.models.load_model(os.path.join(SAVE_PATH, 'VGG16_color_ep007_vloss-0.4171_vacc-0.9003.h5'), custom_objects=dependencies)

prediction = model.predict_generator(generator = test_generator,
                                     steps = get_steps(params['nb_test_samples'], params['batch_size']),
                                     verbose=1,
                                     workers=params['nb_workers'])

Found 20959 validated image filenames.


In [18]:
predicted_class_indices=np.argmax(prediction, axis=1)

# Generator class dictionary mapping
labels = (train_generator.class_indices)
labels = dict((v, k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

# submission = pd.read_csv(os.path.join(LABEL_PATH, 'shape_color_prediction.csv'))
# submission['color_prediction'] = predictions
# submission.to_csv(os.path.join(LABEL_PATH, 'shape_color_prediction.csv'), index=False)

In [19]:
################# 지우지말자 #################

def top_k_label(K):
    top_list = []
    predicted_class_indices=np.argmax(prediction, axis=1)
    labels = (train_generator.class_indices)
    labels = dict((v,k) for k,v in labels.items())
    class_probs = prediction
    for i, l in enumerate(predictions): # idx, label
        class_prob = class_probs[i]
        top_values = (-class_prob).argsort()[:K].tolist() # k 개까지 높은 확률 인덱스 저장
        top_list.append(top_values)
    
    top_arr = np.zeros((len(top_list), K))
    
    for i in range(len(top_list)): # key - values 값 바꾸기
        for j in range(K):
            tmp = top_list[i][j]
            top_arr[i][j] = labels[tmp]
    
    label_list = top_arr.tolist()
    return label_list


# top_k_label(3)

In [20]:
color_list = top_k_label(3)

In [21]:
# top-k 성능비교

top1 = 0
top2 = 0
top3 = 0
s2_c3 = 0
s3_c2 = 0
df = pd.read_csv(os.path.join(LABEL_PATH,'shape_color_prediction.csv'))
for i in range(len(df)):
    if df['shape'][i] in shape_list[i][0:1] and df['color_front'][i] in color_list[i][0:1]:
        top1 += 1
    if df['shape'][i] in shape_list[i][0:2] and df['color_front'][i] in color_list[i][0:2]:
        top2 += 1
    if df['shape'][i] in shape_list[i][0:3] and df['color_front'][i] in color_list[i][0:3]:
        top3 += 1
    if df['shape'][i] in shape_list[i][0:2] and df['color_front'][i] in color_list[i][0:3]:
        s2_c3 += 1
    if df['shape'][i] in shape_list[i][0:3] and df['color_front'][i] in color_list[i][0:2]:
        s3_c2 += 1
        
print('top_1:', top1/len(df))
print('top_2:', top2/len(df))
print('top_3:', top3/len(df))
print('shape_top_2, color_top_3:', s2_c3/len(df))
print('shape_top_3, color_top_2:', s3_c2/len(df))

top_1: 0.8979436041795887
top_2: 0.9770981439954196
top_3: 0.9902189989980438
shape_top_2, color_top_3: 0.9877379645975476
shape_top_3, color_top_2: 0.9795791783959158
