In [19]:
import csv
import datetime
import os
import random

import numpy as np
from keras.callbacks import ModelCheckpoint
from keras.layers import Activation
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dense
from keras.layers.normalization import BatchNormalization
from keras.models import Sequential
from keras.preprocessing import image
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tqdm import tqdm

from common import *



In [20]:
def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(224, 224))
    # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

def display_images(image_paths):
    html_content = "<div width='100%'>"
    for image_path in image_paths:
        html_content += '<div style="font-size: 10px; display:inline-block; width: 224px; border:1px solid black">\
         {image_path}:\
         <img src="{image_path}" style="display:inline-block;"> </div>'.format(image_path=image_path)
    html_content += '</div>'
    display(HTML(html_content))


In [21]:
print(".. setting base configuration ..")

random.seed(34283428)
base_dir_path = '.'
best_model_filepath = '{}/saved-models/model1.hdf5'.format(base_dir_path)


.. setting base configuration ..


In [22]:
print(".. setting model parameters ..")

training_epochs = 100

.. setting model parameters ..


In [23]:
print(".. loading & splitting data ..")
section_start_time = datetime.datetime.utcnow()

image_to_label = {}
images_list = []
label_list = []
with open('{base_dir_path}/data-labels/images.csv'.format(base_dir_path=base_dir_path),
          newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        image_name = row['IMAGE FILENAME'].strip()
        is_positive = 1 if row['IS POSITIVE'] else 0
        if image_name:
            image_path = '{base_dir_path}/data-images/{image_name}'.format(base_dir_path=base_dir_path,
                                                                           image_name=image_name)
            image_to_label[image_path] = is_positive
            images_list.append(image_path)
            label_list.append(is_positive)

label_list_categorical = to_categorical(label_list)

X_train, X_test, y_train, y_test = train_test_split(np.array(images_list),
                                                    np.array(label_list_categorical),
                                                    test_size=0.20,
                                                    random_state=42)

X_train, X_validate, y_train, y_validate = train_test_split(X_train,
                                                            y_train,
                                                            test_size=0.20,
                                                            random_state=42)

train_tensors = paths_to_tensor(X_train).astype('float32')
test_tensors = paths_to_tensor(X_test).astype('float32')
valid_tensors = paths_to_tensor(X_validate).astype('float32')

duration_loading = (datetime.datetime.utcnow() - section_start_time).total_seconds()

  2%|▏         | 15/896 [00:00<00:05, 148.44it/s]

.. loading & splitting data ..


100%|██████████| 896/896 [00:03<00:00, 266.48it/s]
100%|██████████| 280/280 [00:01<00:00, 272.49it/s]
100%|██████████| 224/224 [00:00<00:00, 268.70it/s]


In [24]:
print(".. constructing the model ..")

model = Sequential()

model.add(Conv2D(filters=16, kernel_size=2, padding='same', input_shape=(224, 224, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(filters=32, kernel_size=2, padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(filters=64, kernel_size=2, padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(GlobalAveragePooling2D())
model.add(Dense(2, activation='softmax'))

model.summary()

model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])


.. constructing the model ..
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 224, 224, 16)      208       
_________________________________________________________________
batch_normalization_4 (Batch (None, 224, 224, 16)      64        
_________________________________________________________________
activation_4 (Activation)    (None, 224, 224, 16)      0         
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 112, 112, 16)      0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 112, 112, 32)      2080      
_________________________________________________________________
batch_normalization_5 (Batch (None, 112, 112, 32)      128       
_________________________________________________________________
activation_5 (Activation)    (None, 112, 112, 3

In [25]:
print(".. training the model ..")
section_start_time = datetime.datetime.utcnow()

checkpointer = ModelCheckpoint(filepath=best_model_filepath,
                               verbose=1,
                               save_best_only=True)

model.fit(train_tensors, y_train,
          validation_data=(valid_tensors, y_validate),
          epochs=training_epochs,
          batch_size=20,
          callbacks=[checkpointer],
          verbose=1)

duration_training = (datetime.datetime.utcnow() - section_start_time).total_seconds()


.. training the model ..
Train on 896 samples, validate on 224 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.32806, saving model to ./saved-models/model1.hdf5
Epoch 2/100

Epoch 00002: val_loss did not improve from 0.32806
Epoch 3/100

Epoch 00003: val_loss did not improve from 0.32806
Epoch 4/100

Epoch 00004: val_loss improved from 0.32806 to 0.32072, saving model to ./saved-models/model1.hdf5
Epoch 5/100

Epoch 00005: val_loss improved from 0.32072 to 0.23709, saving model to ./saved-models/model1.hdf5
Epoch 6/100

Epoch 00006: val_loss did not improve from 0.23709
Epoch 7/100

Epoch 00007: val_loss did not improve from 0.23709
Epoch 8/100

Epoch 00008: val_loss did not improve from 0.23709
Epoch 9/100

Epoch 00009: val_loss improved from 0.23709 to 0.20989, saving model to ./saved-models/model1.hdf5
Epoch 10/100

Epoch 00010: val_loss did not improve from 0.20989
Epoch 11/100

Epoch 00011: val_loss did not improve from 0.20989
Epoch 12/100

Epoch 00012: val_loss


Epoch 00042: val_loss did not improve from 0.14194
Epoch 43/100

Epoch 00043: val_loss did not improve from 0.14194
Epoch 44/100

Epoch 00044: val_loss did not improve from 0.14194
Epoch 45/100

Epoch 00045: val_loss did not improve from 0.14194
Epoch 46/100

Epoch 00046: val_loss did not improve from 0.14194
Epoch 47/100

Epoch 00047: val_loss did not improve from 0.14194
Epoch 48/100

Epoch 00048: val_loss did not improve from 0.14194
Epoch 49/100

Epoch 00049: val_loss did not improve from 0.14194
Epoch 50/100

Epoch 00050: val_loss did not improve from 0.14194
Epoch 51/100

Epoch 00051: val_loss did not improve from 0.14194
Epoch 52/100

Epoch 00052: val_loss did not improve from 0.14194
Epoch 53/100

Epoch 00053: val_loss did not improve from 0.14194
Epoch 54/100

Epoch 00054: val_loss did not improve from 0.14194
Epoch 55/100

Epoch 00055: val_loss did not improve from 0.14194
Epoch 56/100

Epoch 00056: val_loss did not improve from 0.14194
Epoch 57/100

Epoch 00057: val_loss di


Epoch 00084: val_loss did not improve from 0.11411
Epoch 85/100

Epoch 00085: val_loss did not improve from 0.11411
Epoch 86/100

Epoch 00086: val_loss did not improve from 0.11411
Epoch 87/100

Epoch 00087: val_loss did not improve from 0.11411
Epoch 88/100

Epoch 00088: val_loss did not improve from 0.11411
Epoch 89/100

Epoch 00089: val_loss did not improve from 0.11411
Epoch 90/100

Epoch 00090: val_loss did not improve from 0.11411
Epoch 91/100

Epoch 00091: val_loss did not improve from 0.11411
Epoch 92/100

Epoch 00092: val_loss did not improve from 0.11411
Epoch 93/100

Epoch 00093: val_loss did not improve from 0.11411
Epoch 94/100

Epoch 00094: val_loss did not improve from 0.11411
Epoch 95/100

Epoch 00095: val_loss did not improve from 0.11411
Epoch 96/100

Epoch 00096: val_loss did not improve from 0.11411
Epoch 97/100

Epoch 00097: val_loss did not improve from 0.11411
Epoch 98/100

Epoch 00098: val_loss did not improve from 0.11411
Epoch 99/100

Epoch 00099: val_loss di

In [26]:
print(".. loading best weights ..")

model.load_weights(best_model_filepath)

print(".. testing the model ..")
section_start_time = datetime.datetime.utcnow()

tmp_predictions = [model.predict(np.expand_dims(tensor, axis=0)) for tensor in test_tensors]
test_predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in test_tensors]

duration_testing = (datetime.datetime.utcnow() - section_start_time).total_seconds()



.. loading best weights ..
.. testing the model ..


In [27]:
print(".. analyzing results ..")

true_positives = 0
false_positives = 0
true_negatives = 0
false_negatives = 0
all_positives = 0
all_test = len(X_test)

false_positive_images = []
false_negative_images = []

for n, test_image in enumerate(X_test):
    prediction_label = True if test_predictions[n] else False
    truth_label = True if y_test[n][1] else False
    correct_prediction = prediction_label == truth_label

#     print("{indicator} {truth_icon} {test_image} (is {truth} predicted {prediction})".format(
#         indicator=' ' if correct_prediction else 'X',
#         truth_icon='|' if truth_label else '=',
#         test_image=test_image,
#         truth=truth_label,
#         prediction=prediction_label,
#     ))

    if truth_label:
        all_positives += 1

    if prediction_label:
        if truth_label:
            true_positives += 1
        else:
            false_positives += 1
            false_positive_images.append(test_image)
    else:
        if truth_label:
            false_negatives += 1
            false_negative_images.append(test_image)
        else:
            true_negatives += 1


.. analyzing results ..


In [28]:
print("________________________________________________________")
print("SUMMARY:\n")
print("loading duration: {0:.1f} seconds".format(duration_loading))
print("training duration: {0:.1f} seconds".format(duration_training))
print("testing duration: {0:.1f} seconds".format(duration_testing))

print("all: ", all_test)
print("all_positives: ", all_positives)
print("true_positives: ", true_positives)
print("true_negatives: ", true_negatives)
print("false_positives: ", false_positives)
print("false_negatives: ", false_negatives)

print("RECALL: {0:.2f}%".format(100 * true_positives / all_positives))
print("ACCURACY: {0:.2f}%".format(100 * (true_positives + true_negatives) / all_test))


________________________________________________________
SUMMARY:

loading duration: 7.0 seconds
training duration: 10939.9 seconds
testing duration: 31.8 seconds
all:  280
all_positives:  56
true_positives:  45
true_negatives:  211
false_positives:  13
false_negatives:  11
RECALL: 80.36%
ACCURACY: 91.43%


## False Positives (first 20):

In [29]:
display_images(false_positive_images[:20])

## False Negatives (first 20):

In [30]:
display_images(false_negative_images[:20])