In [1]:
from facenet_pytorch import MTCNN
import cv2
from PIL import Image
import numpy as np
from matplotlib import pyplot as plt
from tqdm import tqdm
import torch


from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, datasets, models
import torchvision

import torch.nn as nn
import torch.optim as optim

from utility import *

### Helper methods for results :

In [2]:
# Helper methods to get the scores per comics (Usefull for data separation)

def get_nb_files_per_comics(folder) :
    """
    Compute the number of images per comics (Since the images are in subfolders of comics)
    
    : param : folder, the master folder containing all subfolders
    : return : lengths, a list containing the number of image per comics
    """
    
    lengths = []
    for subdir, dirs, files in os.walk(folder):
        if "checkpoints" not in files and "checkpoints" not in dirs and "checkpoints" not in subdir :
            lengths.append(len(files))
    return lengths[1:]


def compute_metric_per_comic(folder, groundtruth, pred, b_pred = False):
    """
    Compute the precision/recall/f-score for each comics
    
    : param : folder, the master folder containing all subfolders
    : groundtruth : The groundtruths of each images
    : pred : the outputs of the model
    : b_pred : depend on the form of the prediction (top left, width, heigth or top_left, bottom right)
    : return : the metrics for each comics
    """
    l = get_nb_files_per_comics(folder)
    results = []
    for i in range(len(l)):
        tp_tot, fp_tot, fn_tot = compute_metrics(groundtruth[sum(l[:i]):sum(l[:i+1])], pred[sum(l[:i]):sum(l[:i+1])], b_pred)
        precision, recall, f_score = prec_rec_f(tp_tot, fp_tot, fn_tot)
        results.append((tp_tot, fp_tot, fn_tot, round(precision, 3), round(recall,3), round(f_score,3)))
    
    return results

def get_dict_comics(folder) :
    length = get_nb_files_per_comics(folder)
    sum_ = 0
    r = {}
    for i, l in enumerate(length):
        r[i] = ((sum_, l+sum_))
        sum_ += l
    return r


def print_exemple_comic(id_, dict_comics, nb = 100):
    for i in range(dict_comics[id_][0], min(dict_comics[id_][0] + nb, dict_comics[id_][1])):
        print_exemple(i)

In [3]:
def print_exemple(index, images, groundtruth, modified = False, pred = None, display = True) :
    '''
    Given an index, will print the image with the groundtruth and the prediction 

    :param image: given image
    :param int width: desired final width 
    :param int width: desired final height
    :Note : if both are given, height will be ignored to conserve aspect ratio
    :return: copy of the image resized
    ''' 
    copy = images[index].copy()
    
    if pred is not None:
        if pred[index] is not None:
            for (column, row, width, height) in pred[index]:
                cv.rectangle(copy, (column, row), (column + width, row + height),
                (0, 255, 0), 4)
    if modified :
        for (column, row, width, height) in groundtruth[index]:
            cv.rectangle(copy, (column, row), (column+width, row+height),
            (0, 0, 255), 4)
    else :
        if groundtruth[index].size > 0 :
            for (x1, y1, x2, y2) in groundtruth[index][:, 1:]:
                cv.rectangle(copy, (x1, y1), (x2, y2), (255, 0, 0), 4)

    copy = resize_img(copy, width=680)
    if display :
        cv.imshow("example", copy)
        cv.waitKey(0)  
        cv.destroyAllWindows()  
    return copy

In [13]:
# Check cuda + load model 

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))
mtcnn = MTCNN(keep_all=True, device=device, thresholds = [0.5, 0.7, 0.7])

Running on device: cuda:0


In [14]:
folder_img = "../dataset/images/images"
folder_gt = "../dataset/groundtruth"

print("Loading images from folder : ", folder_img)
images, path_images = load_images_from_folder(folder_img)
print("Check dataset length : ", len(images))

# Load groundtruth :
print("Loading groundtruth from folder : ", folder_gt)
groundtruth, path_groundtruth = load_face_groundtruth_from_folder(folder_gt, 5)
print("Check groundtruth length : ", len(groundtruth))

Loading images from folder :  ../dataset/images/images
Check dataset length :  772
Loading groundtruth from folder :  ../dataset/groundtruth
Check groundtruth length :  772


In [4]:
folder_img = "../dataset/images/good"
folder_gt = "../dataset/gt/good"

print("Loading images from folder : ", folder_img)
images, path_images = load_images_from_folder(folder_img)
print("Check dataset length : ", len(images))

# Load groundtruth :
print("Loading groundtruth from folder : ", folder_gt)
groundtruth, path_groundtruth = load_face_groundtruth_from_folder(folder_gt, 5)
print("Check groundtruth length : ", len(groundtruth))

Loading images from folder :  ../dataset/images/good
Check dataset length :  162
Loading groundtruth from folder :  ../dataset/gt/good
Check groundtruth length :  162


In [15]:
# Convert for MTCNN
img_mtcnn = [Image.fromarray(cv.cvtColor(frame, cv.COLOR_BGR2RGB)) for frame in images]

In [19]:
# Detect faces
pred_mtcnn = []

for img in tqdm(img_mtcnn) :
    detected_faces, _ = mtcnn.detect(img)
    pred_mtcnn.append(detected_faces)

100%|██████████| 772/772 [12:41<00:00,  1.01it/s] 


In [9]:
sum_ = 0
for gt in groundtruth:
    sum_ += len(gt)
    
print("There is in total %s faces in the test set"%(sum_))

There is in total 1317 faces in the test set


In [24]:
## default
dict_comics = get_dict_comics(folder_img)

results = compute_metric_per_comic(folder_img, groundtruth, pred_mtcnn)
print("Id, tp, fp, fn , prec, recall, f_score")
for i, r in enumerate(results) :
    print(i, r)

Id, tp, fp, fn , prec, recall, f_score
0 (278, 46, 201, 0.858, 0.58, 0.692)
1 (108, 56, 293, 0.659, 0.269, 0.355)
2 (117, 74, 90, 0.613, 0.565, 0.588)
3 (51, 82, 102, 0.383, 0.333, 0.256)
4 (63, 69, 163, 0.477, 0.279, 0.266)
5 (182, 123, 89, 0.597, 0.672, 0.632)
6 (18, 23, 114, 0.439, 0.136, 0.12)
7 (8, 2, 15, 0.8, 0.348, 0.485)
8 (50, 39, 65, 0.562, 0.435, 0.489)
9 (32, 84, 120, 0.276, 0.211, 0.116)
10 (60, 51, 46, 0.541, 0.566, 0.553)
11 (2, 0, 13, 1.0, 0.133, 0.235)
12 (2, 0, 13, 1.0, 0.133, 0.235)
13 (19, 15, 166, 0.559, 0.103, 0.115)
14 (61, 10, 136, 0.859, 0.31, 0.455)
15 (160, 73, 135, 0.687, 0.542, 0.606)
16 (123, 93, 137, 0.569, 0.473, 0.517)
17 (68, 19, 168, 0.782, 0.288, 0.421)
18 (153, 60, 282, 0.718, 0.352, 0.472)
19 (94, 8, 136, 0.922, 0.409, 0.566)
20 (0, 20, 1, 0.0, 0.0, 0.0)
21 (238, 101, 122, 0.702, 0.661, 0.681)
22 (0, 0, 1, 0.0, 0.0, 0.0)
23 (12, 15, 102, 0.444, 0.105, 0.094)
24 (56, 16, 167, 0.778, 0.251, 0.38)
25 (79, 9, 250, 0.898, 0.24, 0.379)
26 (69, 18, 258, 0

In [26]:
tp, fp, fn = compute_metrics(groundtruth, pred_mtcnn)
prec, recall, f_score = prec_rec_f(tp, fp, fn)
print("tp, fp, fn , prec, recall, f_score")
print(tp, fp, fn, prec, recall, f_score)

tp, fp, fn , prec, recall, f_score
2103 1106 3385 0.6553443440324088 0.3831997084548105 0.4836150396688513
