In [11]:
import numpy as np
import matplotlib.pyplot as plt
import os
from scipy import ndimage, misc
from PIL import Image
from IPython.display import Image, display
from glob import glob
from matplotlib.image import imread
from keras.models import Sequential, Model, load_model
from keras.utils import plot_model
from keras.layers import *
from keras.callbacks import *
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD, adam
from sklearn.metrics import classification_report, confusion_matrix
from keras.applications import *

np.random.seed(3)

In [13]:
# By Augmentation, Accuracy 91.95% -> 93.77%
train_datagen = ImageDataGenerator(rescale=1./255,
    height_shift_range=0.05,\
    width_shift_range=0.05, \
    shear_range = 5, \
    zoom_range = 0.05, \
    rotation_range = 3, \
    horizontal_flip = True, \
    vertical_flip = True, \
    channel_shift_range = 15, \
    samplewise_center = True)
test_datagen = ImageDataGenerator(rescale=1./255, \
    samplewise_center = True)
val_datagen = ImageDataGenerator(rescale=1./255, \
    samplewise_center = True)

batch_size = 32 # 16->64->32
epochs = 200 # 50->100 ->200
root_path = '/home/mywork/kijun_kwon/data_split1'
train_path = root_path +'/train'
val_path = root_path +'/val'
test_path = root_path +'/test'
path_list = [train_path, val_path, test_path]

class_root='/home/mywork/kijun_kwon/data_split1/train'
class_list = [ item for item in os.listdir(class_root) if os.path.isdir(os.path.join(class_root, item)) ]
class_list.sort()

sizesOfSet = [0, 0, 0] # size of training set, validation set, test set
for p in range(len(path_list)) :
    for i in range(len(class_list)):
        sizesOfSet[p] += (len([name for name in os.listdir(path_list[p] + '/'+ class_list[i]) if os.path.isfile(os.path.join(path_list[p]+ '/'+ class_list[i], name))]))

image_height = 224
image_width = 224
channels = 3

In [22]:
model = ResNet50(include_top=True, weights = None, input_tensor=None, input_shape=(image_height,image_width,channels), pooling=None, classes=len(class_list))

In [23]:
train_generator = train_datagen.flow_from_directory(
        train_path,
        color_mode = "rgb",
        target_size=(image_height, image_width),
        batch_size = batch_size,
        class_mode='categorical')

test_generator = test_datagen.flow_from_directory(
        test_path,
        shuffle = False,
        color_mode = "rgb",
        target_size=(image_height, image_width),    
        batch_size=batch_size,
        class_mode='categorical')

val_generator = val_datagen.flow_from_directory(
        val_path,
        color_mode = "rgb",
        target_size=(image_height, image_width),
        batch_size=batch_size,
        class_mode='categorical')

Found 1965 images belonging to 7 classes.
Found 658 images belonging to 7 classes.
Found 655 images belonging to 7 classes.


In [24]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [25]:
model_checkpoint = ModelCheckpoint(('./ResNet50.{epoch:02d}.hdf5'),
                                   monitor='val_loss',
                                   verbose=1,
                                   save_best_only=True,
                                   save_weights_only=True)

reduce_learning_rate = ReduceLROnPlateau(monitor='val_loss', factor=0.3,
                                         patience=10, verbose=1)

callbacks = [model_checkpoint, reduce_learning_rate]

In [None]:
history = model.fit_generator(
            train_generator,
            steps_per_epoch = sizesOfSet[0] // batch_size,
            epochs=epochs,
            validation_data=val_generator,
            validation_steps= sizesOfSet[1] // batch_size,
            callbacks=callbacks)

Epoch 1/200

Epoch 00001: val_loss improved from inf to 9.54752, saving model to ./ResNet50.01.hdf5
Epoch 2/200

Epoch 00002: val_loss improved from 9.54752 to 8.43329, saving model to ./ResNet50.02.hdf5
Epoch 3/200

Epoch 00003: val_loss did not improve from 8.43329
Epoch 4/200

Epoch 00004: val_loss did not improve from 8.43329
Epoch 5/200

Epoch 00005: val_loss improved from 8.43329 to 6.24125, saving model to ./ResNet50.05.hdf5
Epoch 6/200

Epoch 00006: val_loss improved from 6.24125 to 3.12767, saving model to ./ResNet50.06.hdf5
Epoch 7/200

Epoch 00007: val_loss did not improve from 3.12767
Epoch 8/200

Epoch 00008: val_loss improved from 3.12767 to 2.65845, saving model to ./ResNet50.08.hdf5
Epoch 9/200

Epoch 00009: val_loss did not improve from 2.65845
Epoch 10/200

Epoch 00010: val_loss did not improve from 2.65845
Epoch 11/200

Epoch 00011: val_loss improved from 2.65845 to 2.63538, saving model to ./ResNet50.11.hdf5
Epoch 12/200

Epoch 00012: val_loss improved from 2.63538 

In [None]:
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
Y_pred = model.predict_generator(test_generator, sizesOfSet[2] // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)
print('\tConfusion Matrix')
conf = confusion_matrix(test_generator.classes, y_pred)
print(conf)
print('\n\t\t\tClassification Report')
report = classification_report(test_generator.classes, y_pred, target_names=class_list)
report_dict = classification_report(test_generator.classes, y_pred, target_names=class_list, output_dict = True)
print(report)
print("\n-- Evaluate --")
scores = model.evaluate_generator(test_generator)
under = 1-report_dict['ok']['precision']
over = 1-report_dict['ok']['recall']
print("%s: %.2f\t%s: %.2f%%\n과검율: %.2f%%\t 미검율: %.2f%%"
      %(model.metrics_names[0], scores[0], model.metrics_names[1], scores[1]*100,
        over*100, under*100))

In [None]:
error_truth = list()
error_count = list()
error_pred = list()
count = 0
for i in range(len(y_pred)):
    if i>0 :
        if test_generator.classes[i-1]!=test_generator.classes[i]:
            count = 0
    if(test_generator.classes[i]!=y_pred[i]):
        error_truth += [test_generator.classes[i]]
        error_count += [count]
        error_pred += [y_pred[i]]
    count += 1

In [None]:
underImageList = list()
for i in range(len(class_list)):
    lgc_file = glob(test_path + '/' + class_list[i] + '/*.jpg')
    for j in range(len(error_truth)):
        if i==error_truth[j] :
#             print('Truth: ')
#             print(class_list[error_truth[j]])
#             print('Predicted as: ')
#             print(class_list[error_pred[j]])
#             display(Image(filename=lgc_file[error_count[j]]) )
            if error_pred[j]==2 :
                underImageList.append(lgc_file[error_count[j]])

In [None]:
underImageList