luxonis · Erol444 · Jul 30, 2021 · Jul 30, 2021 · Jul 30, 2021
diff --git a/gen2-age-gender/main.py b/gen2-age-gender/main.py
@@ -4,7 +4,7 @@
 from pathlib import Path
 import blobconverter
 import cv2
-import depthai
+import depthai as dai
 import numpy as np
 from imutils.video import FPS
 
@@ -19,107 +19,144 @@
 
 debug = not args.no_debug
 
-
-def cos_dist(a, b):
-    return np.dot(a, b)/(np.linalg.norm(a)*np.linalg.norm(b))
-
-
 def frame_norm(frame, bbox):
     normVals = np.full(len(bbox), frame.shape[0])
     normVals[::2] = frame.shape[1]
     return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
 
+def to_planar(arr: np.ndarray, shape: tuple) -> np.ndarray:
+    if shape is not None: arr = cv2.resize(arr, shape)
+    return arr.transpose(2, 0, 1).flatten()
 
-def to_planar(arr: np.ndarray, shape: tuple) -> list:
-    return [val for channel in cv2.resize(arr, shape).transpose(2, 0, 1) for y_col in channel for val in y_col]
-
+def crop_to_square(frame):
+    height = frame.shape[0]
+    width  = frame.shape[1]
+    delta = int((width-height) / 2)
+    return frame[0:height, delta:width-delta]
 
 def create_pipeline():
     print("Creating pipeline...")
-    pipeline = depthai.Pipeline()
+    pipeline = dai.Pipeline()
 
     if args.camera:
         # ColorCamera
         print("Creating Color Camera...")
-        cam = pipeline.createColorCamera()
-        cam.setPreviewSize(300, 300)
-        cam.setResolution(depthai.ColorCameraProperties.SensorResolution.THE_1080_P)
+        cam = pipeline.create(dai.node.ColorCamera)
+        cam.setPreviewSize(1080, 1080)
+        cam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
         cam.setInterleaved(False)
-        cam.setBoardSocket(depthai.CameraBoardSocket.RGB)
+        cam.setBoardSocket(dai.CameraBoardSocket.RGB)
         cam_xout = pipeline.createXLinkOut()
         cam_xout.setStreamName("cam_out")
         cam.preview.link(cam_xout.input)
 
+    # ImageManip that will crop the frame before sending it to the Face detection NN node
+    face_det_manip = pipeline.create(dai.node.ImageManip)
+    face_det_manip.initialConfig.setResize(300, 300)
+    face_det_manip.initialConfig.setFrameType(dai.RawImgFrame.Type.RGB888p)
+
     # NeuralNetwork
     print("Creating Face Detection Neural Network...")
-    detection_nn = pipeline.createMobileNetDetectionNetwork()
-    detection_nn.setConfidenceThreshold(0.5)
-    detection_nn.setBlobPath(str(blobconverter.from_zoo(
+    face_det_nn = pipeline.create(dai.node.MobileNetDetectionNetwork)
+    face_det_nn.setConfidenceThreshold(0.5)
+    face_det_nn.setBlobPath(str(blobconverter.from_zoo(
         name="face-detection-retail-0004",
         shaves=6 if args.camera else 8
     )))
-    detection_nn_xout = pipeline.createXLinkOut()
-    detection_nn_xout.setStreamName("detection_nn")
-    detection_nn.out.link(detection_nn_xout.input)
+    # Link Face ImageManip -> Face detection NN node
+    face_det_manip.out.link(face_det_nn.input)
+
+    # Send face detections to the host (for bounding boxes)
+    face_det_xout = pipeline.create(dai.node.XLinkOut)
+    face_det_xout.setStreamName("face_det_out")
+    face_det_nn.out.link(face_det_xout.input)
+
+    # Script node will take the output from the face detection NN as an input and set ImageManipConfig
+    # to the 'age_gender_manip' to crop the initial frame
+    image_manip_script = pipeline.create(dai.node.Script)
+    image_manip_script.inputs['face_det_in'].setBlocking(False)
+    image_manip_script.inputs['face_det_in'].setQueueSize(4)
+    face_det_nn.out.link(image_manip_script.inputs['face_det_in'])
+    image_manip_script.setScript("""
+while True:
+    face_dets = node.io['face_det_in'].get().detections
+    # node.warn(f"Faces detected: {len(face_dets)}")
+    for det in face_dets:
+        cfg = ImageManipConfig()
+        cfg.setCropRect(det.xmin, det.ymin, det.xmax, det.ymax)
+        cfg.setResize(62, 62)
+        cfg.setKeepAspectRatio(False)
+        node.io['to_manip'].send(cfg)
+""")
+
+    age_gender_manip = pipeline.create(dai.node.ImageManip)
+    age_gender_manip.initialConfig.setResize(62, 62)
+    age_gender_manip.setWaitForConfigInput(False)
+    image_manip_script.outputs['to_manip'].link(age_gender_manip.inputConfig)
 
     if args.camera:
-        cam.preview.link(detection_nn.input)
+        # Use 1080x1080 full image for both NNs
+        cam.preview.link(face_det_manip.inputImage)
+        cam.preview.link(age_gender_manip.inputImage)
     else:
-        detection_in = pipeline.createXLinkIn()
+        detection_in = pipeline.create(dai.node.XLinkIn)
         detection_in.setStreamName("detection_in")
-        detection_in.out.link(detection_nn.input)
 
-    # NeuralNetwork
+        detection_in.out.link(face_det_manip.inputImage)
+        detection_in.out.link(age_gender_manip.inputImage)
+
+    face_cropped_xout = pipeline.create(dai.node.XLinkOut)
+    face_cropped_xout.setStreamName("face_cropped")
+    age_gender_manip.out.link(face_cropped_xout.input)
+
+    # Age/Gender second stange NN
     print("Creating Age Gender Neural Network...")
-    age_gender_in = pipeline.createXLinkIn()
-    age_gender_in.setStreamName("age_gender_in")
-    age_gender_nn = pipeline.createNeuralNetwork()
+    age_gender_nn = pipeline.create(dai.node.NeuralNetwork)
     age_gender_nn.setBlobPath(str(blobconverter.from_zoo(
         name="age-gender-recognition-retail-0013",
         shaves=6 if args.camera else 8
     )))
-    age_gender_nn_xout = pipeline.createXLinkOut()
-    age_gender_nn_xout.setStreamName("age_gender_nn")
-    age_gender_in.out.link(age_gender_nn.input)
+    age_gender_manip.out.link(age_gender_nn.input)
+
+    age_gender_nn_xout = pipeline.create(dai.node.XLinkOut)
+    age_gender_nn_xout.setStreamName("age_gender_out")
     age_gender_nn.out.link(age_gender_nn_xout.input)
 
     print("Pipeline created.")
     return pipeline
 
+with dai.Device() as device:
+    device.setLogLevel(dai.LogLevel.WARN)
+    device.setLogOutputLevel(dai.LogLevel.WARN)
 
-with depthai.Device() as device:
     print("Starting pipeline...")
     device.startPipeline(create_pipeline())
     if args.camera:
-        cam_out = device.getOutputQueue("cam_out", 1, True)
+        cam_out = device.getOutputQueue("cam_out", 4, False)
     else:
         detection_in = device.getInputQueue("detection_in")
-    detection_nn = device.getOutputQueue("detection_nn")
-    age_gender_in = device.getInputQueue("age_gender_in")
-    age_gender_nn = device.getOutputQueue("age_gender_nn")
+
+    face_q = device.getOutputQueue("face_det_out", 4, False)
+    face_cropped_q = device.getOutputQueue("face_cropped", 4, False)
+    age_gender_q = device.getOutputQueue("age_gender_out", 4, False)
 
     detections = []
     results = []
-    face_bbox_q = queue.Queue()
-    next_id = 0
 
     if args.video:
         cap = cv2.VideoCapture(str(Path(args.video).resolve().absolute()))
 
     fps = FPS()
     fps.start()
 
-
     def should_run():
         return cap.isOpened() if args.video else True
 
-
     def get_frame():
         if args.video:
             return cap.read()
         else:
-            return True, np.array(cam_out.get().getData()).reshape((3, 300, 300)).transpose(1, 2, 0).astype(np.uint8)
-
+            return True, cam_out.get().getCvFrame()
 
     try:
         while should_run():
@@ -133,37 +170,37 @@ def get_frame():
                 debug_frame = frame.copy()
 
                 if not args.camera:
-                    nn_data = depthai.NNData()
-                    nn_data.setLayer("input", to_planar(frame, (300, 300)))
+                    nn_data = dai.NNData()
+                    nn_data.setLayer("input", to_planar(crop_to_square(frame)))
                     detection_in.send(nn_data)
 
-            while detection_nn.has():
-                detections = detection_nn.get().detections
+            face_cropped_in = face_cropped_q.tryGet()
+            if debug and face_cropped_in is not None:
+                cv2.imshow("cropped", face_cropped_in.getCvFrame())
+
+            det_in = face_q.tryGet()
+            if det_in is not None:
+                detections = det_in.detections
                 for detection in detections:
                     bbox = frame_norm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
-                    det_frame = frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]
-
-                    nn_data = depthai.NNData()
-                    nn_data.setLayer("data", to_planar(det_frame, (48, 96)))
-                    age_gender_in.send(nn_data)
-                    face_bbox_q.put(bbox)
-
-            while age_gender_nn.has():
-                det = age_gender_nn.get()
-                age = int(float(np.squeeze(np.array(det.getLayerFp16('age_conv3')))) * 100)
-                gender = np.squeeze(np.array(det.getLayerFp16('prob')))
-                gender_str = "female" if gender[0] > gender[1] else "male"
-                bbox = face_bbox_q.get()
-
-                while not len(results) < len(detections) and len(results) > 0:
-                    results.pop(0)
-                results.append({
-                    "bbox": bbox,
-                    "gender": gender_str,
-                    "age": age,
-                    "ts": time.time()
-                })
 
+                    # If there is a face detected, there will also be an age/gender
+                    # inference result available soon, so we can wait for it
+                    det = age_gender_q.get()
+                    age = int(float(np.squeeze(np.array(det.getLayerFp16('age_conv3')))) * 100)
+                    gender = np.squeeze(np.array(det.getLayerFp16('prob')))
+                    gender_str = "female" if gender[0] > gender[1] else "male"
+
+                    while not len(results) < len(detections) and len(results) > 0:
+                        results.pop(0)
+                    results.append({
+                        "bbox": bbox,
+                        "gender": gender_str,
+                        "age": age,
+                        "ts": time.time()
+                    })
+
+            # Dispaly results for 0.2 seconds after the inference
             results = list(filter(lambda result: time.time() - result["ts"] < 0.2, results))
 
             if debug and frame is not None:
@@ -175,7 +212,7 @@ def get_frame():
                     cv2.putText(debug_frame, result["gender"], (bbox[0], y + 20), cv2.FONT_HERSHEY_TRIPLEX, 1.0, (255, 255, 255))
 
                 aspect_ratio = frame.shape[1] / frame.shape[0]
-                cv2.imshow("Camera_view", cv2.resize(debug_frame, (int(900),  int(900 / aspect_ratio))))
+                cv2.imshow("Camera_view", debug_frame)
                 if cv2.waitKey(1) == ord('q'):
                     cv2.destroyAllWindows()
                     break

diff --git a/gen2-age-gender/requirements.txt b/gen2-age-gender/requirements.txt
@@ -1,4 +1,5 @@
 opencv-python==4.5.1.48
 imutils==0.5.4
-depthai==2.7.2.0
 blobconverter==0.0.12
+--extra-index-url https://artifacts.luxonis.com/artifactory/luxonis-python-snapshot-local/
+depthai==2.8.0.0.dev0+9b04a4351006cb802b6a7d9ddb2cca01e5be56ac