In [65]:
import csv
import datetime
import os
import random

import numpy as np
from keras.callbacks import ModelCheckpoint
from keras.layers import Activation
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dense
from keras.layers.normalization import BatchNormalization
from keras.models import Sequential, Model
from keras.preprocessing import image
from keras.utils import to_categorical
from keras.applications.resnet50 import ResNet50
from keras.applications import VGG16
from keras.applications.inception_v3 import InceptionV3
from sklearn.model_selection import train_test_split
from tqdm import tqdm

from common import *



In [66]:
def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(224, 224))
    # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

def display_images(image_paths):
    html_content = "<div width='100%'>"
    for image_path in image_paths:
        html_content += '<div style="font-size: 10px; display:inline-block; width: 224px; border:1px solid black">\
         {image_path}:\
         <img src="{image_path}" style="display:inline-block;"> </div>'.format(image_path=image_path)
    html_content += '</div>'
    display(HTML(html_content))


In [67]:
print(".. setting base configuration ..")

random.seed(34283428)
base_dir_path = '.'
best_model_filepath = '{}/saved-models/model1.hdf5'.format(base_dir_path)


.. setting base configuration ..


In [89]:
print(".. setting model parameters ..")

training_epochs = 100

.. setting model parameters ..


In [90]:
print(".. loading & splitting data ..")
section_start_time = datetime.datetime.utcnow()

image_to_label = {}
images_list = []
label_list = []
with open('{base_dir_path}/data-labels/images.csv'.format(base_dir_path=base_dir_path),
          newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        image_name = row['IMAGE FILENAME'].strip()
        is_positive = 1 if row['IS POSITIVE'] else 0
        if image_name:
            image_path = '{base_dir_path}/data-images/{image_name}'.format(base_dir_path=base_dir_path,
                                                                           image_name=image_name)
            image_to_label[image_path] = is_positive
            images_list.append(image_path)
            label_list.append(is_positive)

label_list_categorical = to_categorical(label_list)

X_train, X_test, y_train, y_test = train_test_split(np.array(images_list),
                                                    np.array(label_list_categorical),
                                                    test_size=0.20,
                                                    random_state=42)

X_train, X_validate, y_train, y_validate = train_test_split(X_train,
                                                            y_train,
                                                            test_size=0.20,
                                                            random_state=42)

train_tensors = paths_to_tensor(X_train).astype('float32')
test_tensors = paths_to_tensor(X_test).astype('float32')
valid_tensors = paths_to_tensor(X_validate).astype('float32')

duration_loading = (datetime.datetime.utcnow() - section_start_time).total_seconds()

  2%|▏         | 16/896 [00:00<00:05, 159.34it/s]

.. loading & splitting data ..


100%|██████████| 896/896 [00:04<00:00, 179.87it/s]
100%|██████████| 280/280 [00:01<00:00, 197.29it/s]
100%|██████████| 224/224 [00:01<00:00, 182.15it/s]


In [91]:
# external_model = InceptionV3(weights='imagenet', include_top=False)
# external_model = ResNet50(weights='imagenet', include_top=False)
external_model = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(224, 224, 3))

In [92]:
print(".. constructing the transfer model ..")

for layer in external_model.layers:
    layer.trainable = False

temp = Conv2D(filters=128, kernel_size=2, padding='same')(external_model.layers[-13].output)

temp = BatchNormalization()(temp)
temp = Activation('relu')(temp)
temp = MaxPooling2D(pool_size=(2, 2))(temp)
temp = GlobalAveragePooling2D()(temp)
predictions = Dense(2, activation='softmax')(temp)

model = Model(input=external_model.input, output=predictions)

model.summary()    

model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
    

.. constructing the transfer model ..
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_21 (InputLayer)        (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56

  


In [93]:
print(".. training the model ..")
section_start_time = datetime.datetime.utcnow()

checkpointer = ModelCheckpoint(filepath=best_model_filepath,
                               verbose=1,
                               save_best_only=True)

model.fit(train_tensors, y_train,
          validation_data=(valid_tensors, y_validate),
          epochs=training_epochs,
          batch_size=20,
          callbacks=[checkpointer],
          verbose=1)

duration_training = (datetime.datetime.utcnow() - section_start_time).total_seconds()


.. training the model ..
Train on 896 samples, validate on 224 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.56935, saving model to ./saved-models/model1.hdf5
Epoch 2/100

Epoch 00002: val_loss improved from 0.56935 to 0.18326, saving model to ./saved-models/model1.hdf5
Epoch 3/100

Epoch 00003: val_loss improved from 0.18326 to 0.16532, saving model to ./saved-models/model1.hdf5
Epoch 4/100

Epoch 00004: val_loss did not improve from 0.16532
Epoch 5/100

Epoch 00005: val_loss improved from 0.16532 to 0.15319, saving model to ./saved-models/model1.hdf5
Epoch 6/100

Epoch 00006: val_loss did not improve from 0.15319
Epoch 7/100

Epoch 00007: val_loss did not improve from 0.15319
Epoch 8/100

Epoch 00008: val_loss improved from 0.15319 to 0.14498, saving model to ./saved-models/model1.hdf5
Epoch 9/100

Epoch 00009: val_loss improved from 0.14498 to 0.12462, saving model to ./saved-models/model1.hdf5
Epoch 10/100

Epoch 00010: val_loss did not improve from 0.12462
Epoc


Epoch 00041: val_loss did not improve from 0.10200
Epoch 42/100

Epoch 00042: val_loss did not improve from 0.10200
Epoch 43/100

Epoch 00043: val_loss did not improve from 0.10200
Epoch 44/100

Epoch 00044: val_loss did not improve from 0.10200
Epoch 45/100

Epoch 00045: val_loss did not improve from 0.10200
Epoch 46/100

Epoch 00046: val_loss did not improve from 0.10200
Epoch 47/100

Epoch 00047: val_loss did not improve from 0.10200
Epoch 48/100

Epoch 00048: val_loss did not improve from 0.10200
Epoch 49/100

Epoch 00049: val_loss did not improve from 0.10200
Epoch 50/100

Epoch 00050: val_loss did not improve from 0.10200
Epoch 51/100

Epoch 00051: val_loss improved from 0.10200 to 0.09891, saving model to ./saved-models/model1.hdf5
Epoch 52/100

Epoch 00052: val_loss did not improve from 0.09891
Epoch 53/100

Epoch 00053: val_loss did not improve from 0.09891
Epoch 54/100

Epoch 00054: val_loss did not improve from 0.09891
Epoch 55/100

Epoch 00055: val_loss did not improve fro


Epoch 00084: val_loss did not improve from 0.09778
Epoch 85/100

Epoch 00085: val_loss did not improve from 0.09778
Epoch 86/100

Epoch 00086: val_loss did not improve from 0.09778
Epoch 87/100

Epoch 00087: val_loss did not improve from 0.09778
Epoch 88/100

Epoch 00088: val_loss did not improve from 0.09778
Epoch 89/100

Epoch 00089: val_loss did not improve from 0.09778
Epoch 90/100

Epoch 00090: val_loss did not improve from 0.09778
Epoch 91/100

Epoch 00091: val_loss did not improve from 0.09778
Epoch 92/100

Epoch 00092: val_loss did not improve from 0.09778
Epoch 93/100

Epoch 00093: val_loss did not improve from 0.09778
Epoch 94/100

Epoch 00094: val_loss did not improve from 0.09778
Epoch 95/100

Epoch 00095: val_loss did not improve from 0.09778
Epoch 96/100

Epoch 00096: val_loss did not improve from 0.09778
Epoch 97/100

Epoch 00097: val_loss did not improve from 0.09778
Epoch 98/100

Epoch 00098: val_loss did not improve from 0.09778
Epoch 99/100

Epoch 00099: val_loss di

In [94]:
print(".. loading best weights ..")

model.load_weights(best_model_filepath)

print(".. testing the model ..")
section_start_time = datetime.datetime.utcnow()

# tmp_predictions = [model.predict(np.expand_dims(tensor, axis=0)) for tensor in test_tensors]
test_predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in tqdm(test_tensors)]

duration_testing = (datetime.datetime.utcnow() - section_start_time).total_seconds()



.. loading best weights ..


  0%|          | 0/280 [00:00<?, ?it/s]

.. testing the model ..


100%|██████████| 280/280 [01:04<00:00,  4.53it/s]


In [95]:
print(".. analyzing results ..")

true_positives = 0
false_positives = 0
true_negatives = 0
false_negatives = 0
all_positives = 0
all_test = len(X_test)

false_positive_images = []
false_negative_images = []

for n, test_image in enumerate(tqdm(X_test)):
    prediction_label = True if test_predictions[n] else False
    truth_label = True if y_test[n][1] else False
    correct_prediction = prediction_label == truth_label

#     print("{indicator} {truth_icon} {test_image} (is {truth} predicted {prediction})".format(
#         indicator=' ' if correct_prediction else 'X',
#         truth_icon='|' if truth_label else '=',
#         test_image=test_image,
#         truth=truth_label,
#         prediction=prediction_label,
#     ))

    if truth_label:
        all_positives += 1

    if prediction_label:
        if truth_label:
            true_positives += 1
        else:
            false_positives += 1
            false_positive_images.append(test_image)
    else:
        if truth_label:
            false_negatives += 1
            false_negative_images.append(test_image)
        else:
            true_negatives += 1


100%|██████████| 280/280 [00:00<00:00, 94080.36it/s]

.. analyzing results ..





In [96]:
print("________________________________________________________")
print("SUMMARY:\n")
print("loading duration: {0:.1f} seconds".format(duration_loading))
print("training duration: {0:.1f} seconds".format(duration_training))
print("testing duration: {0:.1f} seconds".format(duration_testing))

print("all: ", all_test)
print("all_positives: ", all_positives)
print("true_positives: ", true_positives)
print("true_negatives: ", true_negatives)
print("false_positives: ", false_positives)
print("false_negatives: ", false_negatives)

print("RECALL: {0:.2f}%".format(100 * true_positives / all_positives))
print("ACCURACY: {0:.2f}%".format(100 * (true_positives + true_negatives) / all_test))


________________________________________________________
SUMMARY:

loading duration: 9.2 seconds
training duration: 29077.9 seconds
testing duration: 64.8 seconds
all:  280
all_positives:  56
true_positives:  51
true_negatives:  218
false_positives:  6
false_negatives:  5
RECALL: 91.07%
ACCURACY: 96.07%


## False Positives (first 20):

In [97]:
display_images(false_positive_images[:20])

## False Negatives (first 20):

In [98]:
display_images(false_negative_images[:20])