In [1]:
import cv2
import tensorflow as tf
import numpy as np
import os


In [2]:

class UltraLightFaceDetecion():
    def __init__(self, filepath):
        # tflite model init
        self._interpreter = tf.lite.Interpreter(model_path=filepath)
        self._interpreter.allocate_tensors()


    def _pre_processing(self, img):
        # resize image to (240,320,3)
        resized = cv2.resize(img,dsize=(320,240))
        # bgr to rgb
        image_rgb = resized[..., ::-1]
        # converting values to float type
        image_norm = image_rgb.astype(np.float32)
        # normalize all pixel values between -1 and 1 (MinMaX)
        cv2.normalize(image_norm, image_norm,
                      alpha=-1, beta=1, norm_type=cv2.NORM_MINMAX)
        # adding another dimension (1, 240, 320, 3)
        return image_norm[None, ...]

    def inference(self, img):
        # BGR image to tensor
        input_tensor = self._pre_processing(img)

        # get input details, set tensor and invoke
        input_details = self._interpreter.get_input_details()
        self._interpreter.set_tensor(input_details[0]["index"],input_tensor)
        self._interpreter.invoke()

        # get results (making the inference)
        output_details = self._interpreter.get_output_details()
        boxes = self._interpreter.get_tensor(output_details[0]["index"])
        scores = self._interpreter.get_tensor(output_details[1]["index"])

        # decode boxes to corner format
        boxes, scores = self._post_processing(boxes, scores)
        # scailing the bounding box according to the aspect ratio
        boxes *= np.tile(img.shape[1::-1], 2)
        return boxes, scores

    def _post_processing(self, boxes, scores):
        # bounding box regression
        boxes = self._decode_regression(boxes)
        # confidencce for every anchor
        scores = scores[:, 1]

        # confidence threshold filter
        conf_mask = 0.6 < scores
        # getting all anchors with prob greater then 0.6
        boxes, scores = boxes[conf_mask], scores[conf_mask]

        # non-maximum suppression
        # Prunes away boxes that have high intersection-over-union (IOU) overlap with previously selected boxes
        nms_mask = tf.image.non_max_suppression(
            boxes, scores, max_output_size=200, iou_threshold=0.3,
            score_threshold=float('-inf'), name=None
        )
        # nms mask contains the indexes of selected boxes
        # return boxes that satisfies the requirement 
        boxes = np.take(boxes, nms_mask, axis=0)
        scores = np.take(scores, nms_mask, axis=0)
        return boxes, scores

    def _decode_regression(self, reg):
        # bounding box regression
        center_variance = 0.1
        size_variance = 0.2

        # reading the predifened anchors 
        with open('./anchors_wh.npy', 'rb') as f:
            anchors_wh = np.load(f)
        with open('./anchors_xy.npy', 'rb') as f:
            anchors_xy = np.load(f)

        # mathematical operations
        center_xy = reg[:, :2] * center_variance * \
            anchors_wh + anchors_xy
        center_wh = np.exp(
            reg[:, 2:] * size_variance) * anchors_wh / 2

        # center to corner for every anchor
        start_xy = center_xy - center_wh
        end_xy = center_xy + center_wh
        # concatenation of box coordinates
        boxes = np.concatenate((start_xy, end_xy), axis=-1)
        # clip values of boxes between min 0 max 1
        boxes = np.clip(boxes, 0.0, 1.0)
        return boxes


In [3]:
def variance_of_laplacian(image,th=100):
	# calculate this on cropped face
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	fm = cv2.Laplacian(gray, cv2.CV_64F).var()
	print(fm)
	if fm<=th:
		return False
	return True

In [4]:
def ladnmarks(rec,h,w):
        landmarks_top = rec[1]
        landmarks_bottom = rec[3]
        landmarks_left = rec[0]
        landmarks_right = rec[2]

        # expand bbox
        top = int(landmarks_top - 0.8 * (landmarks_bottom - landmarks_top))
        bottom = int(landmarks_bottom + 0.3 * (landmarks_bottom - landmarks_top))
        left = int(landmarks_left - 0.3 * (landmarks_right - landmarks_left))
        right = int(landmarks_right + 0.3 * (landmarks_right - landmarks_left))

        if bottom - top > right - left:
            left -= ((bottom - top) - (right - left)) // 2
            right = left + (bottom - top)
        else:
            top -= ((right - left) - (bottom - top)) // 2
            bottom = top + (right - left)

        left = max(0, left)
        right = min(right, w-1)
        top = max(0, top)
        bottom = min(bottom, h-1)

        return (left,top,right,bottom)


In [5]:
def overlap(rec1, rec2,h,w):
  rec1 = ladnmarks(rec1,h,w)
  rec2 = ladnmarks(rec2,h,w)
  if (rec2[2] > rec1[0] and rec2[2] < rec1[2]) or \
     (rec2[0] > rec1[0] and rec2[0] < rec1[2]):
    x_match = True
  else:
    x_match = False
  if (rec2[3] > rec1[1] and rec2[3] < rec1[3]) or \
     (rec2[1] > rec1[1] and rec2[1] < rec1[3]):
    y_match = True
  else:
    y_match = False
  if x_match and y_match:
    return True
  else:
    return False

In [6]:
def checkHuman(boxes,scores,th=.99):
    indexes = [idx for idx,score in enumerate(scores) if score<th]
    # reverse indexes
    indexes = indexes[::-1]
    [boxes.pop(i) for i in indexes]
    [scores.pop(i) for i in indexes]
    return np.array(boxes),np.array(scores)
    

In [7]:
def image_inference(img_path, color=(125, 255, 0)):
    # read image
    img = cv2.imread(img_path)
    quality = True
    status = "Accepted"
    # make inference
    boxes, scores = fd.inference(img)
    # TODO :: check the threshold 
    # human face
    boxes,scores = checkHuman(list(boxes),list(scores),th=.99)
    # No Face
    if len(boxes) == 0:
        status = "Rejected:No Face"
    # Multiple Boxes
    elif len(boxes) > 2:
        status = "Rejected: Multple Face"
    elif len(boxes) == 2:
        if overlap(boxes[0], boxes[1],img.shape[0],img.shape[1]):
            status = "Rejected: Intersecting Faces"
    elif quality:
        for box,score in zip(boxes.astype(int),scores):
            # TODO :: check the threshold 
            quality = quality and variance_of_laplacian(img[box[1]:box[3],box[0]:box[2]])
            # cv2.imwrite(os.path.join("./data/output/", str(score)+img_path.split("/")[-1]),img[box[1]:box[3],box[0]:box[2]])
            
        if not quality:
            status = "Rejected: Bad Quality"
    else:
        status = "Accepted"
    # TODO: Reject small face (25%)
    # saving the image
    cv2.putText(img,status,(0,img.shape[0]-5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (209, 255, 0, 255), 1) 
    cv2.imwrite(os.path.join("./data/output/", img_path.split("/")[-1]),img)

In [8]:
# All checks
# Face not found
# human face
# Multiple faces
# no intersection (accept)
# intersection (reject)
# Small face
# Blurry

In [9]:
# RFB (higher precision) or slim (faster)
mode = 'RFB'
filepath = f"pretrained/version-{mode}-320_without_postprocessing.tflite"
fd = UltraLightFaceDetecion(filepath)

In [10]:
# single image inference
img_path = './data/test/test_2021-12-18T06_46_35.891210.jpg'
# test_2021-12-18T06_46_35.891210.jpg
boxes,scores = image_inference(img_path)

In [None]:
# directory inference
for img_path in os.listdir("./data/test"):
    print(img_path)
    img_path = f'./data/test/{img_path}'
    image_inference(img_path)
