In [23]:
import os

import numpy as np
import random

import cv2
from matplotlib import pyplot as plt
from keras.models import load_model

# Location of frames
training_image_src = '/mnt/disks/a/frames'
validation_image_src = '/mnt/disks/b/frames'

"""
The directory is divided into folders by the candidate number
Within each candidate's folder, the frames are further divided by the label
Naming convention of the frames is as follows: [candidate number]_[frame_number]_[label]
Single digit candidate numbers are padded with a 0
Frame numbers are consecutive and not padded
Label can be 0, 5 or 10
"""

# This function helps to extract data and labels and return it as a Numpy array from a given image file
def extract_data_and_label(image_path):
    # We use opencv to read the images as grayscale, this will give us the 2d vector of pixels
    # Note that it returns a numpy array and not a Python list, but Keras uses Numpy arrays anyway
    image = cv2.imread(image_path, cv2.cv2.IMREAD_GRAYSCALE)
    # Because some of the images are corrupt, we got to do this
    if image is None or image.data is None or image.size == 0:
        return None, None

    # Scale the images to a fixed size, second argument is the target dimension, chose an arbitrary
    # value for now, (100, 100). Additional arguments can be provided to fine-tune the scaling.
    image = cv2.resize(image, (100, 100))
    image = image / 255
    
    """
    !!! Should we extract only the faces? By right CNN is supposed to be able to pick out key features
    on its own, but this could possibly make it more effective. This can be done using opencv
    """

    # Next is to extract the labels for each image, in our case, it is just the last portion of the filename
    label = int(os.path.splitext(file)[0].split('_')[2])
    # Convert to 0, 1, 2
    label = 0 if label == 0 else 1 if label == 5 else 2

    return image, label

def extract_data(image_path):
    image = cv2.imread(image_path, cv2.cv2.IMREAD_GRAYSCALE)
    # Because some of the images are corrupt, we got to do this
    if image is None or image.data is None or image.size == 0:
        return None, None

    # Scale the images to a fixed size, second argument is the target dimension, chose an arbitrary
    # value for now, (100, 100). Additional arguments can be provided to fine-tune the scaling.
    image = cv2.resize(image, (100, 100))
    image = image / 255
    return image

def extract_label(image_path):
    # Next is to extract the labels for each image, in our case, it is just the last portion of the filename
    label = int(os.path.splitext(image_path)[0].split('_')[2])
    # Convert to 0, 1, 2
    if label != 0:
        label = 1
    return label

# Let's pick a few images to test our model with
test_files = []

# for root, dirs, files in os.walk(training_image_src):
#     for file in files:
#         file_path = os.path.join(root, file)
#         test_files.append(file_path)
for root, dirs, files in os.walk(validation_image_src):
    for file in files:
        file_path = os.path.join(root, file)
        test_files.append(file_path)
        
test_files = list(filter(lambda x: '_5.jpg' not in x, test_files))
test_files = random.sample(test_files, 100)
test_data = list(map(extract_data, test_files))
test_data = np.array(test_data)
test_data = test_data.reshape(100, 100, 100, 1)

test_labels = list(map(extract_label, test_files))
print(test_labels)


# Load and summarize model
model = load_model('cnn.h5')
model.summary()

pred = model.predict(test_data, batch_size=100, verbose=1)
print('Prediction completed!')
from sklearn.metrics import roc_curve
from sklearn import metrics

fpr, tpr, thresholds = roc_curve(test_labels, [c[1] for c in pred])

print(fpr)
print(tpr)

# # Iterate through our random sample and test for each one
# for file in test_files:
#     # First display the image
#     img = cv2.imread(file)
#     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#     plt.imshow(np.abs(img), cmap = 'gray')
#     plt.xticks([]), plt.yticks([])
#     plt.title(file)
#     plt.show()
    
#     # Extract the data
#     data, label = extract_data_and_label(file)
#     if data is None and label is None:
#         continue
    
#     data = data.reshape(1, 100, 100, 1)
#     result = model.predict(data, batch_size=1, verbose=1)
    
#     print(result)
#     result = np.argmax(result[0])
#     if result == 0:
#         print('Model predicited this image to have a label of 0')
#     elif result == 1:
#         print('Model predicited this image to have a label of 10')
#     else:
#         print('Something went wrong...')
    

[0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0]
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 98, 98, 64)        640       
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 49, 49, 64)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 47, 47, 32)        18464     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 23, 23, 32)        0         
_________________________________________________________________
flatten_2 (Flatte

In [24]:
fpr_cnn = fpr
tpr_cnn = tpr

%store fpr_cnn
%store tpr_cnn

Stored 'fpr_cnn' (ndarray)
Stored 'tpr_cnn' (ndarray)
