In [None]:
import os
# import matplotlib
# import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import common.const as const
from PIL import Image, ImageDraw, ImageFont
import cv2
import time

from object_detection.utils import ops as utils_ops
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import label_map_util

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.get_logger().setLevel('ERROR')

COCO17_HUMAN_POSE_KEYPOINTS = const.COCO17_HUMAN_POSE_KEYPOINTS

PATH_TO_LABELS = './models/research/object_detection/data/mscoco_label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(
    PATH_TO_LABELS, use_display_name=True)

model_display_name = 'EfficientDet_D4_1024x1024'
model_handle = './models/knownese/' + model_display_name

print('Selected model:' + model_display_name)
print('Model Handle at TensorFlow Hub: {}'.format(model_handle))

In [None]:
# ## Loading the selected model from TensorFlow Hub
# Here we just need the model handle that was selected and use the Tensorflow Hub library to load it to memory.
start_time = time.time()

print('loading model...')
hub_model = hub.load(model_handle)
print('model loaded!')

end_time = time.time()
elapsed_time = end_time - start_time
print('Done! Took {} seconds'.format(elapsed_time))

In [None]:
label_id_offset = 0
results = None
result = None
keypoints = None
keypoint_scores = None

In [None]:
def image_to_numpy_array(img):
    image = Image.fromarray(img)
    (im_width, im_height) = image.size
    # print('Width: {}'.format(im_width))
    # print('Height: {}'.format(im_height))
    return np.array(image).reshape(
        (1, im_height, im_width, 3)).astype(np.uint8)

In [None]:
cap = cv2.VideoCapture(0)

In [None]:
while(cap.isOpened()):
    isDetect = False
    start_time = time.time()

    _, image_np = cap.read()
    image_np = image_to_numpy_array(image_np)
    image_np_with_detections = image_np.copy()

    if(cv2.waitKey(1) & 0xFF == ord('a')):
        isDetect = True
        results = hub_model(image_np)

        # different object detection models have additional results
        result = {key: value.numpy() for key, value in results.items()}

        # Use keypoints if available in detections
        if 'detection_keypoints' in result:
            keypoints = result['detection_keypoints'][0]
            keypoint_scores = result['detection_keypoint_scores'][0]
    elif (cv2.waitKey(1) & 0xFF == ord('r')):
        result = None

    if(result is not None):
        viz_utils.visualize_boxes_and_labels_on_image_array(
            image_np_with_detections[0],
            result['detection_boxes'][0],
            (result['detection_classes'][0] + label_id_offset).astype(int),
            result['detection_scores'][0],
            category_index,
            use_normalized_coordinates=True,
            max_boxes_to_draw=200,
            min_score_thresh=.30,
            agnostic_mode=False,
            keypoints=keypoints,
            keypoint_scores=keypoint_scores,
            keypoint_edges=COCO17_HUMAN_POSE_KEYPOINTS,
            line_thickness=1)

    # show img
    cv2.imshow('Capturing after detection', image_np_with_detections[0])

    end_time = time.time()
    elapsed_time = end_time - start_time
    
    if(isDetect):
        print('Done! Took {} seconds'.format(elapsed_time))

    if(cv2.waitKey(1) & 0xFF == ord('q')):
        break

print('Stoped!')

In [None]:
cap.release()
cv2.destroyAllWindows()

while(cap.isOpened()):
    start_time = time.time()

    _, image_np = cap.read()
    image_np = image_to_numpy_array(image_np)
    image_np_with_mask = image_np.copy()

    if(cv2.waitKey(1) & 0xFF == ord('a')):
        results = hub_model(image_np)

        # different object detection models have additional results
        # all of them are explained in the documentation
        result = {key: value.numpy() for key, value in results.items()}

        if 'detection_masks' in result:
            # we need to convert np.arrays to tensors
            detection_masks = tf.convert_to_tensor(result['detection_masks'][0])
            detection_boxes = tf.convert_to_tensor(result['detection_boxes'][0])

            # Reframe the the bbox mask to the image size.
            detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                        detection_masks, detection_boxes,
                        image_np.shape[1], image_np.shape[2])
            detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
                                                tf.uint8)
            result['detection_masks_reframed'] = detection_masks_reframed.numpy()
    elif (cv2.waitKey(1) & 0xFF == ord('r')):
        result = None

    if(result is not None):
        viz_utils.visualize_boxes_and_labels_on_image_array(
            image_np_with_mask[0],
            result['detection_boxes'][0],
            (result['detection_classes'][0] + label_id_offset).astype(int),
            result['detection_scores'][0],
            category_index,
            use_normalized_coordinates=True,
            max_boxes_to_draw=200,
            min_score_thresh=.30,
            agnostic_mode=False,
            instance_masks=result.get('detection_masks_reframed', None),
            line_thickness=2)

    # show img
    cv2.imshow('Capturing after detection', image_np_with_mask[0])

    end_time = time.time()
    elapsed_time = end_time - start_time

    if(cv2.waitKey(1) & 0xFF == ord('a')):
        print('Done! Took {} seconds'.format(elapsed_time))

    if(cv2.waitKey(1) & 0xFF == ord('q')):
        break

print('Stoped!')

cap.release()
cv2.destroyAllWindows()