# Detecting blurry images with a CNN
This notebook contains code to classify images as sharp or blurry, based on an convolutional neural network (CNN) that was trained on a dataset that contains both sharp and blurry images, as preprocessed in the PreProcessing1.ipynb notebook (from the MR4 set).

A CNN is implemented with Keras with a TensorFlow backend.

The rationale behind using a CNN is that certain filters (kernels) can be used to detect blur. By training the CNN, the network will (most likely) generate filters work like this. An example of such a filter is a LaPlacian[1]

The end of the notebook contains some examples of predicted and wrongly predicted images.

[1] https://www.pyimagesearch.com/2015/09/07/blur-detection-with-opencv/

In [1]:

# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras

# Helper libraries
import numpy as np
import cv2 as cv
import glob
import math
import datetime
import random
import matplotlib.pyplot as plt

# To make it reproducible :-)
random.seed(21)
tf.set_random_seed(20)

# Tested on tensorflow version 1.5.0
print(tf.__version__)

%matplotlib inline
tf.logging.set_verbosity(tf.logging.ERROR)

  from ._conv import register_converters as _register_converters


1.7.0


# Settings

In [2]:
INPUT_DIM = 400            # input dimension of images in pixels (assumes a square image)
NUM_OF_CATEGORIES = 1     # total number of categories

INPUT_FOLDER = '../../../Downloads/processed_multiblur_400p/'
ORIG_FOLDER = 'orig/'
BLUR_FOLDER = 'blur/'
TEST_SPLIT_FRAC = 0.2
CHECKPOINT_FILEPATH = "./blurdetector400p-{epoch:02d}-{val_acc:.2f}.h5"

FIT_BATCH_SIZE = 64
FIT_MAX_EPOCHS = 400
FIT_VALIDATION_SPLIT = 0.2

#FIT_STOP_MIN_DELTA = 0.01
#FIT_STOP_PATIENCE = 5


MODEL_STRUCTURE = [
    keras.layers.Conv2D(32, kernel_size=3, activation='relu', padding='same', input_shape=(INPUT_DIM,INPUT_DIM,3)),
    keras.layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding='same'),
    keras.layers.Conv2D(64, kernel_size=3, activation='relu', padding='same'),
    keras.layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding='same'),
#     keras.layers.Conv2D(32, kernel_size=3, activation='relu', padding='same'),
#     keras.layers.Conv2D(32, kernel_size=3, activation='relu', padding='same'),
#     keras.layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding='same'),
    keras.layers.Flatten(),
    keras.layers.Dense(100),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(100),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(NUM_OF_CATEGORIES, activation='sigmoid'),
]

# Create test/train set

In [3]:
dir_content = glob.glob(INPUT_FOLDER+ORIG_FOLDER+'*.jpg')
input_filenames = [x.split('/')[-1] for x in dir_content]

In [4]:
# Every filenames appears twice: once in blur and once in orig
# We split the set of filenames into a training and a test set
# Please note: a filename appears twice: once in blur and once in orig

num_test_items = math.floor( len(input_filenames) * TEST_SPLIT_FRAC )
print('Selecting {} test items from total set {}'.format(num_test_items,len(input_filenames)))

random.shuffle(input_filenames)

test_items = input_filenames[:num_test_items]
train_items = input_filenames[num_test_items:]



Selecting 224 test items from total set 1123


# Loading train & test images into memory

In [5]:
def load_images(items, orig_folder, blur_folder):
    y = []
    x = []

    for file in items:
        orig = cv.imread(orig_folder+file)
        blur = cv.imread(blur_folder+file)

        x.append(orig)
        y.append(1) # original sample = positive = 1

        x.append(blur)
        y.append(0) # blurred sample = negative = 0

    # Randomize the set (otherwise it will always be 10101010101010)
    zipped = list(zip(x,y))
    random.shuffle(zipped)
    x,y = zip(*zipped)
    
    x_set = np.stack(x, axis=0)
    
    return x_set, np.asarray(y)


orig_folder = INPUT_FOLDER+ORIG_FOLDER
blur_folder = INPUT_FOLDER+BLUR_FOLDER
x_test, y_test = load_images(test_items, orig_folder, blur_folder)
x_train, y_train = load_images(train_items, orig_folder, blur_folder)

In [6]:
print("Size of training set: {}".format(len(y_train)))
print("Size of test set: {}".format(len(y_test)))

Size of training set: 1798
Size of test set: 448


# Compile model

In [7]:
#model = keras.Sequential(MODEL_STRUCTURE)
model = keras.models.load_model("secondrun_blurdetector400p-161-0.76.h5")
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 400, 400, 32)      896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 200, 200, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 200, 200, 64)      18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 100, 100, 64)      0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 640000)            0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               64000100  
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
__________

In [8]:
#model.compile(optimizer=keras.optimizers.Adam(lr=1e-5), 
#              loss='binary_crossentropy',
#              metrics=['accuracy'])

# Train the model

In [9]:
history = model.fit(x_train, y_train,
          batch_size=FIT_BATCH_SIZE, 
          epochs=FIT_MAX_EPOCHS,
          callbacks=[
#               keras.callbacks.EarlyStopping(monitor='acc', min_delta=FIT_STOP_MIN_DELTA, patience=FIT_STOP_PATIENCE)
              keras.callbacks.ModelCheckpoint(CHECKPOINT_FILEPATH, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
          ],
          validation_split=FIT_VALIDATION_SPLIT)

Train on 1438 samples, validate on 360 samples
Epoch 1/400

Epoch 00001: val_acc improved from -inf to 0.71389, saving model to ./blurdetector400p-01-0.71.h5
Epoch 2/400

Epoch 00002: val_acc did not improve
Epoch 3/400

Epoch 00003: val_acc improved from 0.71389 to 0.76389, saving model to ./blurdetector400p-03-0.76.h5
Epoch 4/400

Epoch 00004: val_acc did not improve
Epoch 5/400

Epoch 00005: val_acc did not improve
Epoch 6/400

Epoch 00006: val_acc did not improve
Epoch 7/400

Epoch 00007: val_acc did not improve
Epoch 8/400

Epoch 00008: val_acc did not improve
Epoch 9/400

Epoch 00009: val_acc did not improve
Epoch 10/400

Epoch 00010: val_acc did not improve
Epoch 11/400

Epoch 00011: val_acc did not improve
Epoch 12/400

Epoch 00012: val_acc did not improve
Epoch 13/400

Epoch 00013: val_acc did not improve
Epoch 14/400

Epoch 00014: val_acc did not improve
Epoch 15/400

Epoch 00015: val_acc did not improve
Epoch 16/400

Epoch 00016: val_acc did not improve
Epoch 17/400

Epoch 0


Epoch 00046: val_acc did not improve
Epoch 47/400

Epoch 00047: val_acc did not improve
Epoch 48/400

Epoch 00048: val_acc did not improve
Epoch 49/400

Epoch 00049: val_acc did not improve
Epoch 50/400

Epoch 00050: val_acc did not improve
Epoch 51/400

Epoch 00051: val_acc did not improve
Epoch 52/400

Epoch 00052: val_acc did not improve
Epoch 53/400

Epoch 00053: val_acc did not improve
Epoch 54/400

Epoch 00054: val_acc did not improve
Epoch 55/400

Epoch 00055: val_acc did not improve
Epoch 56/400

Epoch 00056: val_acc did not improve
Epoch 57/400

Epoch 00057: val_acc did not improve
Epoch 58/400

Epoch 00058: val_acc improved from 0.77222 to 0.77778, saving model to ./blurdetector400p-58-0.78.h5
Epoch 59/400

Epoch 00059: val_acc did not improve
Epoch 60/400

Epoch 00060: val_acc did not improve
Epoch 61/400

Epoch 00061: val_acc improved from 0.77778 to 0.78333, saving model to ./blurdetector400p-61-0.78.h5
Epoch 62/400

Epoch 00062: val_acc did not improve
Epoch 63/400

Epoc


Epoch 00090: val_acc did not improve
Epoch 91/400

Epoch 00091: val_acc did not improve
Epoch 92/400

Epoch 00092: val_acc did not improve
Epoch 93/400

Epoch 00093: val_acc did not improve
Epoch 94/400

Epoch 00094: val_acc did not improve
Epoch 95/400

Epoch 00095: val_acc did not improve
Epoch 96/400

Epoch 00096: val_acc did not improve
Epoch 97/400

Epoch 00097: val_acc did not improve
Epoch 98/400

Epoch 00098: val_acc did not improve
Epoch 99/400

Epoch 00099: val_acc did not improve
Epoch 100/400

Epoch 00100: val_acc did not improve
Epoch 101/400

Epoch 00101: val_acc did not improve
Epoch 102/400

Epoch 00102: val_acc improved from 0.82500 to 0.83056, saving model to ./blurdetector400p-102-0.83.h5
Epoch 103/400

Epoch 00103: val_acc did not improve
Epoch 104/400

Epoch 00104: val_acc did not improve
Epoch 105/400

Epoch 00105: val_acc did not improve
Epoch 106/400

Epoch 00106: val_acc did not improve
Epoch 107/400

Epoch 00107: val_acc improved from 0.83056 to 0.83333, savi


Epoch 00136: val_acc did not improve
Epoch 137/400

Epoch 00137: val_acc did not improve
Epoch 138/400

Epoch 00138: val_acc did not improve
Epoch 139/400

Epoch 00139: val_acc did not improve
Epoch 140/400

Epoch 00140: val_acc did not improve
Epoch 141/400

Epoch 00141: val_acc did not improve
Epoch 142/400

Epoch 00142: val_acc did not improve
Epoch 143/400

Epoch 00143: val_acc did not improve
Epoch 144/400

Epoch 00144: val_acc did not improve
Epoch 145/400

Epoch 00145: val_acc improved from 0.84722 to 0.85000, saving model to ./blurdetector400p-145-0.85.h5
Epoch 146/400

Epoch 00146: val_acc did not improve
Epoch 147/400

Epoch 00147: val_acc did not improve
Epoch 148/400

Epoch 00148: val_acc did not improve
Epoch 149/400

Epoch 00149: val_acc did not improve
Epoch 150/400

Epoch 00150: val_acc did not improve
Epoch 151/400

Epoch 00151: val_acc did not improve
Epoch 152/400

Epoch 00152: val_acc did not improve
Epoch 153/400

Epoch 00153: val_acc did not improve
Epoch 154/400


Epoch 00181: val_acc did not improve
Epoch 182/400

Epoch 00182: val_acc did not improve
Epoch 183/400

Epoch 00183: val_acc did not improve
Epoch 184/400

Epoch 00184: val_acc did not improve
Epoch 185/400

Epoch 00185: val_acc did not improve
Epoch 186/400

Epoch 00186: val_acc did not improve
Epoch 187/400

Epoch 00187: val_acc did not improve
Epoch 188/400

Epoch 00188: val_acc did not improve
Epoch 189/400

Epoch 00189: val_acc did not improve
Epoch 190/400

Epoch 00190: val_acc did not improve
Epoch 191/400

Epoch 00191: val_acc did not improve
Epoch 192/400

Epoch 00192: val_acc did not improve
Epoch 193/400

Epoch 00193: val_acc did not improve
Epoch 194/400

Epoch 00194: val_acc did not improve
Epoch 195/400

Epoch 00195: val_acc did not improve
Epoch 196/400

Epoch 00196: val_acc did not improve
Epoch 197/400

Epoch 00197: val_acc did not improve
Epoch 198/400

Epoch 00198: val_acc did not improve
Epoch 199/400

Epoch 00199: val_acc did not improve
Epoch 200/400

Epoch 00200

KeyboardInterrupt: 

In [10]:
#del model
#model = keras.models.load_model('./blurdetector400p-171-0.86.h5')

In [None]:
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# Test the model

In [11]:
# Run the model prediction
y_tested = model.predict(x_test)

# Since the model has a sigmoid function in the last dense layer, outputs are between 0-1. Squash into binary...
decision_boundary = 0.5
y_tested[ y_tested > decision_boundary ] = 1
y_tested[ y_tested <= decision_boundary ] = 0

In [12]:
num_correct = 0
num_total = 0
correctly_predicted = y_tested.reshape(-1) == y_test

for state in correctly_predicted:
    if state == True:
        num_correct += 1
    num_total += 1

accuracy = num_correct/num_total
print("Accuracy on test set: {}%".format(round(accuracy,4)*100))
print("Number of images wrongly predicted: {}".format(len(correctly_predicted[correctly_predicted==False])))

Accuracy on test set: 88.39%
Number of images wrongly predicted: 52


# Save the model

In [None]:
# datestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M')
# model.save('blurdetector_partialblurrectangular_{}.h5'.format(datestamp))

# Inspect the output

### Show random images with truth/prediction from test set

In [None]:
show = 5
labels = {0:'blur', 1:'sharp'}

for i in range(show):
    
    random_draw = random.randint(0,len(y_test))
    
    img = x_test[random_draw]
    truth = int(y_test[random_draw])
    predicted = int(y_tested[random_draw])
    
    plt.title('Truth: {t}, Predicted: {p}'.format(t=labels[truth], p=labels[predicted]))
    plt.imshow(img)
    plt.show()

### Show incorrectly predicted images

In [None]:
# Find the wrongly predicted ones
wrongs = []
for i in range(len(y_test)):
    if y_test[i] != y_tested[i]:
        wrong = {
            'image': x_test[i],
            'truth': int(y_test[i]),
            'pred': int(y_tested[i]),
        }
        wrongs.append(wrong)
print('Found wrongs: {}'.format(len(wrongs)))

# Plot the wrong predicted ones
for wrong in wrongs[0:min(20,len(wrongs))]:
    plt.title('Truth: {t}, Predicted: {p}'.format(t=labels[wrong['truth']], p=labels[wrong['pred']]))
    plt.imshow(wrong['image'])
    plt.show()