In [21]:
import csv
import datetime
import os
import random

import numpy as np
from keras.callbacks import ModelCheckpoint
from keras.layers import Activation
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dense
from keras.layers.normalization import BatchNormalization
from keras.models import Sequential
from keras.preprocessing import image
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tqdm import tqdm

from common import *



In [22]:
def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(270, 270))
    # convert PIL.Image.Image type to 3D tensor with shape (270, 270, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 270, 270, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

def display_images(image_paths):
    html_content = "<div width='100%'>"
    for image_path in image_paths:
        html_content += '<div style="font-size: 10px; display:inline-block; width: 270px; border:1px solid black">\
         {image_path}:\
         <img src="{image_path}" style="display:inline-block;"> </div>'.format(image_path=image_path)
    html_content += '</div>'
    display(HTML(html_content))


In [23]:
print(".. setting base configuration ..")

random.seed(34283428)
base_dir_path = '.'
best_model_filepath = '{}/saved-models/model1.hdf5'.format(base_dir_path)


.. setting base configuration ..


In [24]:
print(".. setting model parameters ..")

training_epochs = 1

.. setting model parameters ..


In [25]:
print(".. loading & splitting data ..")
section_start_time = datetime.datetime.utcnow()

image_to_label = {}
images_list = []
label_list = []
with open('{base_dir_path}/data-labels/images.csv'.format(base_dir_path=base_dir_path),
          newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        image_name = row['IMAGE FILENAME'].strip()
        is_positive = 1 if row['IS POSITIVE'] else 0
        if image_name:
            image_path = '{base_dir_path}/data-images/{image_name}'.format(base_dir_path=base_dir_path,
                                                                           image_name=image_name)
            image_to_label[image_path] = is_positive
            images_list.append(image_path)
            label_list.append(is_positive)

label_list_categorical = to_categorical(label_list)

X_train, X_test, y_train, y_test = train_test_split(np.array(images_list),
                                                    np.array(label_list_categorical),
                                                    test_size=0.20,
                                                    random_state=42)

X_train, X_validate, y_train, y_validate = train_test_split(X_train,
                                                            y_train,
                                                            test_size=0.20,
                                                            random_state=42)

train_tensors = paths_to_tensor(X_train).astype('float32')
test_tensors = paths_to_tensor(X_test).astype('float32')
valid_tensors = paths_to_tensor(X_validate).astype('float32')

duration_loading = (datetime.datetime.utcnow() - section_start_time).total_seconds()

  0%|          | 0/896 [00:00<?, ?it/s]

.. loading & splitting data ..


100%|██████████| 896/896 [00:08<00:00, 105.26it/s]
100%|██████████| 280/280 [00:02<00:00, 112.32it/s]
100%|██████████| 224/224 [00:02<00:00, 107.71it/s]


In [13]:
print(".. constructing the model ..")

model = Sequential()

model.add(Conv2D(filters=16, kernel_size=2, padding='same', input_shape=(270, 270, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(filters=32, kernel_size=2, padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(filters=64, kernel_size=2, padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(GlobalAveragePooling2D())
model.add(Dense(2, activation='softmax'))

model.summary()

model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])


.. constructing the model ..
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 270, 270, 16)      208       
_________________________________________________________________
batch_normalization_4 (Batch (None, 270, 270, 16)      64        
_________________________________________________________________
activation_4 (Activation)    (None, 270, 270, 16)      0         
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 135, 135, 16)      0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 135, 135, 32)      2080      
_________________________________________________________________
batch_normalization_5 (Batch (None, 135, 135, 32)      128       
_________________________________________________________________
activation_5 (Activation)    (None, 135, 135, 3

In [14]:
print(".. training the model ..")
section_start_time = datetime.datetime.utcnow()

checkpointer = ModelCheckpoint(filepath=best_model_filepath,
                               verbose=1,
                               save_best_only=True)

model.fit(train_tensors, y_train,
          validation_data=(valid_tensors, y_validate),
          epochs=training_epochs,
          batch_size=20,
          callbacks=[checkpointer],
          verbose=1)

duration_training = (datetime.datetime.utcnow() - section_start_time).total_seconds()


.. training the model ..
Train on 896 samples, validate on 224 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.45852, saving model to ./saved-models/model1.hdf5
Epoch 2/100

Epoch 00002: val_loss improved from 0.45852 to 0.31917, saving model to ./saved-models/model1.hdf5
Epoch 3/100

Epoch 00003: val_loss did not improve from 0.31917
Epoch 4/100

Epoch 00004: val_loss did not improve from 0.31917
Epoch 5/100

Epoch 00005: val_loss did not improve from 0.31917
Epoch 6/100

Epoch 00006: val_loss improved from 0.31917 to 0.23507, saving model to ./saved-models/model1.hdf5
Epoch 7/100

Epoch 00007: val_loss improved from 0.23507 to 0.23114, saving model to ./saved-models/model1.hdf5
Epoch 8/100

Epoch 00008: val_loss did not improve from 0.23114
Epoch 9/100

Epoch 00009: val_loss improved from 0.23114 to 0.21480, saving model to ./saved-models/model1.hdf5
Epoch 10/100

Epoch 00010: val_loss did not improve from 0.21480
Epoch 11/100

Epoch 00011: val_loss did not improve 


Epoch 00042: val_loss did not improve from 0.16719
Epoch 43/100

Epoch 00043: val_loss did not improve from 0.16719
Epoch 44/100

Epoch 00044: val_loss did not improve from 0.16719
Epoch 45/100

Epoch 00045: val_loss improved from 0.16719 to 0.16511, saving model to ./saved-models/model1.hdf5
Epoch 46/100

Epoch 00046: val_loss did not improve from 0.16511
Epoch 47/100

Epoch 00047: val_loss did not improve from 0.16511
Epoch 48/100

Epoch 00048: val_loss did not improve from 0.16511
Epoch 49/100

Epoch 00049: val_loss did not improve from 0.16511
Epoch 50/100

Epoch 00050: val_loss improved from 0.16511 to 0.14077, saving model to ./saved-models/model1.hdf5
Epoch 51/100

Epoch 00051: val_loss did not improve from 0.14077
Epoch 52/100

Epoch 00052: val_loss did not improve from 0.14077
Epoch 53/100

Epoch 00053: val_loss did not improve from 0.14077
Epoch 54/100

Epoch 00054: val_loss did not improve from 0.14077
Epoch 55/100

Epoch 00055: val_loss did not improve from 0.14077
Epoch 5

In [16]:
print(".. loading best weights ..")

model.load_weights(best_model_filepath)

print(".. testing the model ..")
section_start_time = datetime.datetime.utcnow()

test_predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in tqdm(test_tensors)]

duration_testing = (datetime.datetime.utcnow() - section_start_time).total_seconds()



.. loading best weights ..


  0%|          | 0/280 [00:00<?, ?it/s]

.. testing the model ..


100%|██████████| 280/280 [00:28<00:00, 11.60it/s]


In [17]:
print(".. analyzing results ..")

true_positives = 0
false_positives = 0
true_negatives = 0
false_negatives = 0
all_positives = 0
all_test = len(X_test)

false_positive_images = []
false_negative_images = []

for n, test_image in enumerate(X_test):
    prediction_label = True if test_predictions[n] else False
    truth_label = True if y_test[n][1] else False
    correct_prediction = prediction_label == truth_label

#     print("{indicator} {truth_icon} {test_image} (is {truth} predicted {prediction})".format(
#         indicator=' ' if correct_prediction else 'X',
#         truth_icon='|' if truth_label else '=',
#         test_image=test_image,
#         truth=truth_label,
#         prediction=prediction_label,
#     ))

    if truth_label:
        all_positives += 1

    if prediction_label:
        if truth_label:
            true_positives += 1
        else:
            false_positives += 1
            false_positive_images.append(test_image)
    else:
        if truth_label:
            false_negatives += 1
            false_negative_images.append(test_image)
        else:
            true_negatives += 1


.. analyzing results ..


In [18]:
print("________________________________________________________")
print("SUMMARY:\n")
print("loading duration: {0:.1f} seconds".format(duration_loading))
print("training duration: {0:.1f} seconds".format(duration_training))
print("testing duration: {0:.1f} seconds".format(duration_testing))

print("all: ", all_test)
print("all_positives: ", all_positives)
print("true_positives: ", true_positives)
print("true_negatives: ", true_negatives)
print("false_positives: ", false_positives)
print("false_negatives: ", false_negatives)

print("RECALL: {0:.2f}%".format(100 * true_positives / all_positives))
print("ACCURACY: {0:.2f}%".format(100 * (true_positives + true_negatives) / all_test))


________________________________________________________
SUMMARY:

loading duration: 8.6 seconds
training duration: 30788.1 seconds
testing duration: 28.8 seconds
all:  280
all_positives:  56
true_positives:  44
true_negatives:  218
false_positives:  6
false_negatives:  12
RECALL: 78.57%
ACCURACY: 93.57%


## False Positives (first 20):

In [19]:
display_images(false_positive_images[:20])

## False Negatives (first 20):

In [20]:
display_images(false_negative_images[:20])