In [1]:
from keras.applications import VGG16
from keras.layers import GlobalAveragePooling2D, Dense, Dropout
from keras.models import Model

def vgg16_model(trainable=True):
    base_model = VGG16(False, "imagenet")
    train_from_layer = -2
    for layer in base_model.layers[:train_from_layer]:
        layer.trainable = False
        print("{} is not trainable".format(layer.name))
    for layer in base_model.layers[train_from_layer:]:
        #layer.trainable = True
        layer.trainable = False
        print("{} is trainable".format(layer.name))
    last_conv_layer = base_model.get_layer("block5_conv3")
    x = GlobalAveragePooling2D()(last_conv_layer.output)
    x = Dropout(0.5)(x)
    x = Dense(512, activation="relu")(x)        
    predictions = Dense(1, activation="sigmoid")(x)
    return Model(base_model.input, predictions)



model = vgg16_model(False)
model.summary()

Using TensorFlow backend.


input_1 is not trainable
block1_conv1 is not trainable
block1_conv2 is not trainable
block1_pool is not trainable
block2_conv1 is not trainable
block2_conv2 is not trainable
block2_pool is not trainable
block3_conv1 is not trainable
block3_conv2 is not trainable
block3_conv3 is not trainable
block3_pool is not trainable
block4_conv1 is not trainable
block4_conv2 is not trainable
block4_conv3 is not trainable
block4_pool is not trainable
block5_conv1 is not trainable
block5_conv2 is not trainable
block5_conv3 is trainable
block5_pool is trainable
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64) 

In [2]:
from Datasets import Datasets

dataset_name = "Bradbury"
dataset = Datasets.datasets()[dataset_name]

In [3]:
from keras.applications.xception import preprocess_input
import numpy as np

# Add preprocessing
train_images = dataset[0].images
train_labels  = dataset[0].labels

test_images = dataset[1].images
test_labels = dataset[1].labels

validation_images = dataset[2].images
validation_labels = dataset[2].labels

In [4]:
%matplotlib inline
import matplotlib.pyplot as plt
import os
from ProjectPaths import ProjectPaths

In [5]:
from keras.preprocessing.image import ImageDataGenerator

data_generator = ImageDataGenerator(rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range = [0.9, 1.2],
    #brightness_range = [0.5, 1.5],
    horizontal_flip=True)

test_datagen = ImageDataGenerator()


In [6]:
batch_size = 64

train_generator = data_generator.flow(train_images, train_labels, batch_size=batch_size)
test_generator = test_datagen.flow(test_images, test_labels, batch_size=batch_size, shuffle=False)

In [None]:
from keras.callbacks import LearningRateScheduler

def schedule_lr(epoch):
    return 0.01

lrCallback = LearningRateScheduler(schedule_lr)

In [8]:
from keras.optimizers import SGD

epochs = 100
learning_rate = 0.1
decay_rate = learning_rate / epochs
momentum = 0.8
sgd = SGD(lr=learning_rate, momentum=momentum, decay=decay_rate, nesterov=False)

In [9]:
from PerformanceMetrics import PerformanceMetrics

model.compile(optimizer="adadelta", loss='binary_crossentropy',
                  metrics=['accuracy', PerformanceMetrics.precision,
                           PerformanceMetrics.recall, PerformanceMetrics.fmeasure])

In [10]:
from keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping




model_name = "vgg16_3t_wmp_wr_{}".format(dataset_name)

checkpoint_dir = ProjectPaths.instance().checkpoint_dir_for(model_name, batch_size, epochs)
if not os.path.exists(checkpoint_dir):
    os.mkdir(checkpoint_dir)

file_in_checkpoint_dir = ProjectPaths.instance().file_in_checkpoint_dir(model_name, batch_size,
                                                                 epochs,  model_name +
                                                                 "__{epoch:02d}_{val_acc:.2f}.hdf5")

early_stopping_callback = EarlyStopping(patience=10)
model_checkpoint_callback = ModelCheckpoint(file_in_checkpoint_dir, monitor='val_acc', verbose=True,
                                                save_weights_only=True,
                                                save_best_only=True)

log_dir = os.path.join(ProjectPaths.instance().log_dir, model_name)
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=0,  write_graph=False, write_images=False)


model.fit_generator(train_generator,
                    steps_per_epoch=len(train_labels) // batch_size, 
                    epochs=epochs,
                    callbacks=[early_stopping_callback, model_checkpoint_callback, tensorboard_callback],
                    validation_data=test_generator,
                    validation_steps=len(test_labels))

Epoch 1/100

Epoch 00001: val_acc improved from -inf to 0.83338, saving model to /media/tim/Data/Work/CBS/DeepSolaris/Models/vgg16_3t_wmp_wr_Bradbury_2018-12-05_64_100/vgg16_3t_wmp_wr_Bradbury__01_0.83.hdf5
Epoch 2/100

Epoch 00002: val_acc improved from 0.83338 to 0.83388, saving model to /media/tim/Data/Work/CBS/DeepSolaris/Models/vgg16_3t_wmp_wr_Bradbury_2018-12-05_64_100/vgg16_3t_wmp_wr_Bradbury__02_0.83.hdf5
Epoch 3/100

Epoch 00003: val_acc improved from 0.83388 to 0.84999, saving model to /media/tim/Data/Work/CBS/DeepSolaris/Models/vgg16_3t_wmp_wr_Bradbury_2018-12-05_64_100/vgg16_3t_wmp_wr_Bradbury__03_0.85.hdf5
Epoch 4/100

Epoch 00004: val_acc improved from 0.84999 to 0.85250, saving model to /media/tim/Data/Work/CBS/DeepSolaris/Models/vgg16_3t_wmp_wr_Bradbury_2018-12-05_64_100/vgg16_3t_wmp_wr_Bradbury__04_0.85.hdf5
Epoch 5/100

Epoch 00005: val_acc improved from 0.85250 to 0.85401, saving model to /media/tim/Data/Work/CBS/DeepSolaris/Models/vgg16_3t_wmp_wr_Bradbury_2018-12-05

<keras.callbacks.History at 0x7f9e381e0240>

In [11]:
import pandas as pd

train_eval = model.evaluate(train_images, train_labels, batch_size)
test_eval = model.evaluate(test_images, test_labels, batch_size)
validation_eval = model.evaluate(validation_images, validation_labels, batch_size)

np_model_evaluations = np.array([train_eval, test_eval, validation_eval])

evaluations = pd.DataFrame(np_model_evaluations, columns=model.metrics_names)
print(evaluations.head())

       loss       acc  precision    recall  fmeasure
0  0.349678  0.848439   0.778929  0.973567  0.863916
1  0.350276  0.850994   0.775621  0.983001  0.865168
2  0.367178  0.835892   0.766750  0.972160  0.855646


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import roc_curve

def find_optimal_cutoff(target, predicted):
   """ Find the optimal probability cutoff point for a classification model related to event rate
   Parameters
   ----------
   target : Matrix with dependent or target data, where rows are observations

   predicted : Matrix with predicted data, where rows are observations

   Returns
   -------
   list type, with optimal cutoff value

   """
   fpr, tpr, threshold = roc_curve(target, predicted)
   i = np.arange(len(tpr))
   roc = pd.DataFrame({'tf' : pd.Series(tpr-(1-fpr), index=i), 'threshold' : pd.Series(threshold, index=i)})
   roc_t = roc.ix[(roc.tf-0).abs().argsort()[:1]]

   return list(roc_t['threshold'])

In [None]:
from sklearn.metrics import confusion_matrix

test_predictions = model.predict(test_images, batch_size)

In [None]:
cut_off = find_optimal_cutoff(test_labels, test_predictions)
cut_off

In [None]:
predicted_labels = test_predictions > cut_off
confusion_matrix(test_labels, predicted_labels)


In [None]:
from sklearn.metrics import classification_report

print(classification_report(test_labels, predicted_labels))

# Aachen

In [None]:
ac_dataset = Datasets.datasets()["AcMüDüHo"]

train_images = ac_dataset[0].images
train_labels = ac_dataset[0].labels

test_images = ac_dataset[1].images
test_labels = ac_dataset[1].labels

validation_images = ac_dataset[2].images
validation_labels = ac_dataset[2].labels

eval_images = np.concatenate((train_images, test_images, validation_images), axis=0)
eval_labels = np.concatenate((train_labels, test_labels, validation_labels), axis=0)


print(train_images.shape, test_images.shape, validation_images.shape)
print(eval_images.shape, eval_labels.shape)

In [None]:
eval_predictions = model.predict(eval_images, batch_size) > cut_off
confusion_matrix(eval_labels, eval_predictions)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(eval_labels, eval_predictions))

# Fresno

In [None]:
fresno_dataset = Datasets.datasets()["Fresno"]

train_images = fresno_dataset[0].images
train_labels = fresno_dataset[0].labels

test_images = fresno_dataset[1].images
test_labels = fresno_dataset[1].labels

validation_images = fresno_dataset[2].images
validation_labels = fresno_dataset[2].labels

fresno_eval_images = np.concatenate((train_images, test_images, validation_images), axis=0)
fresno_eval_labels = np.concatenate((train_labels, test_labels, validation_labels), axis=0)


print(train_images.shape, test_images.shape, validation_images.shape)
print(fresno_eval_images.shape, fresno_eval_labels.shape)

In [None]:
fresno_eval_predictions = model.predict(fresno_eval_images, batch_size) > cut_off
confusion_matrix(fresno_eval_labels, fresno_eval_predictions)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(fresno_eval_labels, fresno_eval_predictions))

# Heerlen Preprocessing

Now we use the trained model to preprocess the images from Heerlen and see how well it performs.

In [None]:
heerlen_dir = os.path.join(ProjectPaths.instance().image_dir, "Heerlen75x75")
image_files = [os.path.join(heerlen_dir, filename) for filename in os.listdir(heerlen_dir) if filename.endswith("rgb_2016.tiff") or filename.endswith("rgb_2017.tiff")]
image_files = sorted(image_files)
image_files[0:10]

In [None]:
len(image_files)

In [None]:
def create_dir(path):
    if os.path.exists(path):
        return
    os.makedirs(path)

In [None]:
preprocessed_dir = os.path.join(ProjectPaths.instance().image_dir, "Heerlen75x75_preprocessed")
positives_dir = os.path.join(preprocessed_dir, "Positives")
negatives_dir = os.path.join(preprocessed_dir, "Negatives")

create_dir(positives_dir)
create_dir(negatives_dir)


In [None]:
from keras.preprocessing.image import load_img, img_to_array
from keras.applications.vgg16 import preprocess_input

def prepare_image(filename):
    img = load_img(filename) 
    img_array = img_to_array(img)
    return img_array[:,:, ::-1]

def image_generator(image_files, batch_size):
    batch = []
    for i, filename in enumerate(image_files):
        if i > 0 and (i % batch_size == 0):
            old_batch = batch
            batch = []
            yield np.array(old_batch)
        batch.append(prepare_image(filename))
    #if len(batch) > 0:
    #    repeat_last = len(batch) - batch_size
    #    repeated_images = [batch[-1] for i in range(repeat_last)]
    #   yield np.array(batch + repeated_images)

heerlen_image_generator = image_generator(image_files, batch_size)

Predict labels for the images in the source directory and write them to positive or negative directory based on the prediction

In [None]:
predictions = model.predict_generator(heerlen_image_generator, steps=len(image_files) // batch_size)
predictions

In [None]:
predictions = predictions.reshape(predictions.shape[0])
predictions.shape

In [None]:
from shutil import copyfile

for i, file_path in enumerate(image_files):        
    filename = os.path.basename(file_path)
    if i >= predictions.shape[0]:
        break
    
    prediction = predictions[i]    
    if prediction > cut_off:
        output_path = os.path.join(positives_dir, filename)
    else:        
        output_path = os.path.join(negatives_dir, filename)
    copyfile(file_path, output_path)

In [None]:
len(os.listdir(positives_dir)), len(os.listdir(negatives_dir))

In [None]:
positive_images = [load_img(os.path.join(positives_dir, image)) for i, image in enumerate(os.listdir(positives_dir)) if i < 25]

_, ax = plt.subplots(5,5, figsize=(10,10))

j = 0
for r in range(5):
    for c in range(5):
        ax[r,c].imshow(positive_images[j])     
        j += 1 


In [None]:
negative_images = [load_img(os.path.join(negatives_dir, image)) for i, image in enumerate(os.listdir(negatives_dir)) if i < 25]

_, ax = plt.subplots(5,5, figsize=(10,10))

j = 0
for r in range(5):
    for c in range(5):
        ax[r,c].imshow(negative_images[j])     
        j += 1 
