In [5]:
import cv2
from functools import partial
import cv2
import tensorflow as tf
import numpy as np
import os


In [6]:

class UltraLightFaceDetecion():
    def __init__(self, filepath):
        # tflite model init
        self._interpreter = tf.lite.Interpreter(model_path=filepath)
        self._interpreter.allocate_tensors()


    def _pre_processing(self, img):
        # resize image to (240,320,3)
        resized = cv2.resize(img,dsize=(320,240))
        # bgr to rgb
        image_rgb = resized[..., ::-1]
        # converting values to float type
        image_norm = image_rgb.astype(np.float32)
        # normalize all pixel values between -1 and 1 (MinMaX)
        cv2.normalize(image_norm, image_norm,
                      alpha=-1, beta=1, norm_type=cv2.NORM_MINMAX)
        # adding another dimension (1, 240, 320, 3)
        return image_norm[None, ...]

    def inference(self, img):
        # BGR image to tensor
        input_tensor = self._pre_processing(img)

        # get input details, set tensor and invoke
        input_details = self._interpreter.get_input_details()
        self._interpreter.set_tensor(input_details[0]["index"],input_tensor)
        self._interpreter.invoke()

        # get results (making the inference)
        output_details = self._interpreter.get_output_details()
        boxes = self._interpreter.get_tensor(output_details[0]["index"])[0]
        scores = self._interpreter.get_tensor(output_details[1]["index"])[0]

        # decode boxes to corner format
        boxes, scores = self._post_processing(boxes, scores)
        # scailing the bounding box according to the aspect ratio
        boxes *= np.tile(img.shape[1::-1], 2)
        return boxes, scores

    def _post_processing(self, boxes, scores):
        # bounding box regression
        boxes = self._decode_regression(boxes)
        # confidencce for every anchor
        scores = scores[:, 1]

        # confidence threshold filter
        conf_mask = 0.6 < scores
        # getting all anchors with prob greater then 0.6
        boxes, scores = boxes[conf_mask], scores[conf_mask]

        # non-maximum suppression
        # Prunes away boxes that have high intersection-over-union (IOU) overlap with previously selected boxes
        nms_mask = tf.image.non_max_suppression(
            boxes, scores, max_output_size=200, iou_threshold=0.3,
            score_threshold=float('-inf'), name=None
        )
        # nms mask contains the indexes of selected boxes
        # return boxes that satisfies the requirement 
        boxes = np.take(boxes, nms_mask, axis=0)
        return boxes, scores

    def _decode_regression(self, reg):
        # bounding box regression
        center_variance = 0.1
        size_variance = 0.2

        # reading the predifened anchors 
        with open('./anchors_wh.npy', 'rb') as f:
            anchors_wh = np.load(f)
        with open('./anchors_xy.npy', 'rb') as f:
            anchors_xy = np.load(f)

        # mathematical operations
        center_xy = reg[:, :2] * center_variance * \
            anchors_wh + anchors_xy
        center_wh = np.exp(
            reg[:, 2:] * size_variance) * anchors_wh / 2

        # center to corner for every anchor
        start_xy = center_xy - center_wh
        end_xy = center_xy + center_wh
        # concatenation of box coordinates
        boxes = np.concatenate((start_xy, end_xy), axis=-1)
        # clip values of boxes between min 0 max 1
        boxes = np.clip(boxes, 0.0, 1.0)
        return boxes


In [7]:
def image_inference(img_path, model_path, color=(125, 255, 0)):

    fd = UltraLightFaceDetecion(model_path)
    # read image
    img = cv2.imread(img_path)
    # make inference
    boxes, scores = fd.inference(img)
    # plot the box using the returned coordinates
    for result in boxes.astype(int):
        cv2.rectangle(img, (result[0], result[1]),
                        (result[2], result[3]), color, 2)
    # saving the image
    cv2.imwrite(os.path.join("./data/output/", img_path.split("/")[-1]), img)

In [9]:

# RFB (higher precision) or slim (faster)
mode = 'RFB'
img_path = './data/input/2022-01-01_268.png'
filepath = f"pretrained/version-{mode}-320_without_postprocessing.tflite"
image_inference(img_path, filepath)

