In [1]:
import traitlets
from traitlets.config.configurable import SingletonConfigurable

#use opencv to covert the depth image to RGB image for displaying purpose
import cv2
import numpy as np

#multi-threading is used to capture the image in real time performance
import threading
import json

In [2]:
import torch

print(torch.cuda.is_available())

print(torch.cuda.device_count())

print(torch.cuda.current_device())

print(torch.cuda.get_device_name(torch.cuda.current_device()))

True
1
0
NVIDIA GeForce GTX 1050


In [3]:
from facenet_pytorch import MTCNN
from SixDRepNet import SixDRepNet
from SixDRepNet import utils

face_detector = MTCNN(select_largest=True,device='cuda')
pose_detector = SixDRepNet()

def clamp(val,s,b):
    return max(s,min(val,b))

In [4]:
import uuid
import time

class FaceTracker:
    expiry_timer = 5
    max_correlation_distance = 200
    def __init__(self):
        self.tracked_faces = {}

    def update(self,detections):
        temp = {}
        for face in detections:
            id = str(uuid.uuid4())
            center = (int((face["x1"] + face["x2"]) / 2),int((face["y1"] + face["y2"]) / 2))
            last_updated = time.time()
            temp[id] = {"center":center,
                        "last_updated":last_updated,
                        "detection_data":face}
            
        detections = temp.copy()
        del temp


        distance_pairings = {}
        for existing_id in list(self.tracked_faces.keys()):
            existing_box_center = self.tracked_faces[existing_id]["center"]
            for new_id in list(detections.keys()):
                new_center = detections[new_id]["center"]

                dx = existing_box_center[0]-new_center[0]
                dy = existing_box_center[1]-new_center[1]
                distance = (dx**2+dy**2)**0.5
                distance_pairings[(existing_id,new_id)] = distance

        # I have no idea what this does or how it works; but it sorts the distance pairings by the distance
        distance_pairings = {k: v for k, v in sorted(distance_pairings.items(), key=lambda item: item[1])}

        temp = {}

        for pairing in list(distance_pairings.keys()):
            if len(detections) == 0 or len(self.tracked_faces) == 0:
                break

            if distance_pairings[pairing] > self.max_correlation_distance:
                break

            existing_id = pairing[0]
            new_id = pairing[1]
            try:
                self.tracked_faces[existing_id]
                updated_info = detections[new_id]
            except KeyError:
                # this happens when either the new face or the old face has already been paired up.
                continue
            temp[existing_id] = updated_info
            del detections[new_id]
            del self.tracked_faces[existing_id]

        for existing_id in list(self.tracked_faces.keys()):
            # now we cull the ones that havent been updated, if they have expired
            
            if self.tracked_faces[existing_id]["last_updated"] + self.expiry_timer < time.time():
                del self.tracked_faces[existing_id]

        temp.update(self.tracked_faces)
        temp.update(detections)
        self.tracked_faces = temp.copy()
        





In [5]:
import ipywidgets.widgets as widgets
from IPython.display import display
import time

def bgr8_to_jpeg(value):#convert numpy array to jpeg coded data for displaying 
    return bytes(cv2.imencode('.jpg',value)[1])

def resize_with_padding(image,target_size):
    old_size= image.shape[:2]
    print(old_size)
    ratio = float(target_size) / max(old_size)
    new_dim = (int(old_size[1] * ratio),int(old_size[0] * ratio))
    print(new_dim)

    image = cv2.resize(image, new_dim)

    return image


def text_to_html(value):
    style = """
    <style>
    div {
        min-width: 320px;
    }
    p {
        white-space: pre;
        line-height : 14pt;
    }
    </style>
    """
    value = value.replace("\n","<br>")
    return style+"<div><p>{}</p></div>".format(value)

text_widget = widgets.HTML(value="stats readout here")
text_widget2 = widgets.HTML(value="timing readouthere")
image_widget = widgets.Image(format='jpeg',width=640)
side_by_side = widgets.HBox([image_widget,text_widget,text_widget2])
display(side_by_side)

cap = cv2.VideoCapture(0)
face_tracker = FaceTracker()
while True:
    frame_success, frame = cap.read()
    height,width,chan = np.shape(frame)

    detection = face_detector.detect(frame)

    output_dict = {}
    detections = []
    try:
        n = len(detection[0])
        n = min(n,1)
        
        for i in range(n):
            det_dict = {}
            det_dict["confidence"] = float(detection[1][i])
            bbox = detection[0][i]

            det_dict["x1"] = int(bbox[0])
            det_dict["y1"] = int(bbox[1])
            det_dict["x2"] = int(bbox[2])
            det_dict["y2"] = int(bbox[3])

            detections.append(det_dict)
    except TypeError:
        pass
    output_dict["detections"] = detections

    output_frame = np.copy(frame)
    for i,face in enumerate(detections):
        x1 = clamp(face["x1"],0,width)
        y1 = clamp(face["y1"],0,height)
        x2 = clamp(face["x2"],0,width)
        y2 = clamp(face["y2"],0,height)

        output_frame = cv2.rectangle(output_frame,(x1,y1),(x2,y2),(255,127,0),2)

        midpoint = (x1+int((x2-x1)/2),y1+int((y2-y1)/2)) # point in middle of bounding box
        smallest_dim = int(min(x2-x1,y2-y1))
        x_diff = (x2-x1) - smallest_dim
        y_diff = (y2-y1) - smallest_dim

        x1 += int(np.floor(x_diff/2))
        y1 += int(np.floor(y_diff/2))
        cropped_image = frame[y1:y1+smallest_dim,x1:x1+smallest_dim]

        pitch,yaw,roll = pose_detector.predict(cropped_image)
        detections[i]["facing_angles"] = [pitch.item(),yaw.item(),roll.item()]
        pitch = pitch.item() * np.pi / 180
        yaw = yaw.item() * np.pi / 180
        roll = roll.item() * np.pi / 180

        x = np.cos(pitch)*np.sin(-yaw)
        y = np.sin(-pitch)
        z = np.cos(pitch)*np.cos(yaw)
        detections[i]["facing_vec"] = [x, y, z]
        facing_vec = np.array([x, y, z])
        #pose_detector.draw_axis(output_frame,yaw,pitch,roll,midpoint[0],midpoint[1])

        forward_vec = np.array([0, 0, 1])

        ang = np.arccos(np.dot(forward_vec,facing_vec)) * 180/np.pi

        output_frame = cv2.putText(output_frame,"angle from facing: {:.1f}".format(ang),(20,30),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,255),1)
        
        output_frame = cv2.arrowedLine(output_frame,midpoint,(int(midpoint[0]+x*50),int(midpoint[1]+y*50)),(0,0,255),5)
    face_tracker.update(output_dict["detections"])
    text_widget.value = text_to_html(json.dumps(face_tracker.tracked_faces,indent=8))
    image_widget.value = bgr8_to_jpeg(output_frame)

HBox(children=(Image(value=b'', format='jpeg', width='640'), HTML(value='stats readout here'), HTML(value='tim…

  if method is "Min":
  if method is "Min":


KeyboardInterrupt: 