In [4]:
from depthai_sdk import OakCamera,Previews, FPSHandler, ResizeMode
from depthai_sdk.managers import PipelineManager, PreviewManager, BlobManager, NNetManager
import depthai as dai
import cv2
import argparse
from pathlib import Path
import blobconverter
import time

In [5]:
FRAME_SIZE = (640,360)
DET_INPUT_SIZE = (300,300)
# model_name = "face-detection-retail-0004"
model_name = "yolov4_tiny_coco_416x416"
zoo_type = "depthai"
blob_path = None

In [6]:
pipeline = dai.Pipeline()

In [7]:
# for RGB Camera frame
cam = pipeline.createColorCamera()
cam.setPreviewSize(FRAME_SIZE[0],FRAME_SIZE[1])
cam.setInterleaved(False)
cam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
cam.setBoardSocket(dai.CameraBoardSocket.RGB)

In [8]:
# Defining Mono Camera sources (Stereo Part for Depth)
monoLeft = pipeline.createMonoCamera()
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)

monoRight = pipeline.createMonoCamera()
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)

In [9]:
stereo = pipeline.createStereoDepth()
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)

In [10]:
# using blobconverter to get the blob of the required model
if model_name != None:
    blob_path = blobconverter.from_zoo(
        name=model_name,
        shaves=8,
        zoo_type=zoo_type
    )

Downloading C:\Users\hp\.cache\blobconverter\yolov4_tiny_coco_416x416_openvino_2021.4_8shave.blob...
Done


In [11]:
# defining face detection NN node
face_spac_det_nn = pipeline.createYoloSpatialDetectionNetwork()
face_spac_det_nn.setConfidenceThreshold(0.75)
face_spac_det_nn.setBlobPath(blob_path)



In [12]:
# Defining face detection input config
face_det_manip = pipeline.createImageManip()
face_det_manip.initialConfig.setResize(DET_INPUT_SIZE[0],DET_INPUT_SIZE[1])
face_det_manip.initialConfig.setKeepAspectRatio(False)

<depthai.ImageManipConfig at 0x1e585c1f3b0>

In [13]:
# linking RGB output with ImageManip node, output of image manip node to neural netwrok input and the stereo depth output to the NN node
cam.preview.link(face_det_manip.inputImage)
face_det_manip.out.link(face_spac_det_nn.input)
stereo.depth.link(face_spac_det_nn.inputDepth)

In [14]:
#creating a stream to get the output from the camera
x_preview_out = pipeline.createXLinkOut()
x_preview_out.setStreamName("preview")
cam.preview.link(x_preview_out.input)

In [15]:
# create detection output
#creating a stream to get the output from the neural Network
det_out = pipeline.createXLinkOut()
det_out.setStreamName('det_out')
face_spac_det_nn.out.link(det_out.input)

In [16]:
def display_info(frame, bbox, coordinates, status, status_color,fps):
    #displaying bounding box
    cv2.rectangle(frame, bbox,status_color[status],2)
    #displaying coordinates
    if coordinates is not None:
        coord_x, coord_y, coord_z = coordinates
        cv2.putText(frame, f"X: {int(coord_x)} mm", (bbox[0]+10,bbox[1]+20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
        cv2.putText(frame, f"Y: {int(coord_y)} mm", (bbox[0]+10,bbox[1]+20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
        cv2.putText(frame, f"Z: {int(coord_z)} mm", (bbox[0]+10,bbox[1]+20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
    #create bg for showing details
    cv2.rectangle(frame,(5,5,175,100),(50,0,0),-1)
    #Display authetication status on the frame
    cv2.putText(frame,status,(20,40),cv2.FONT_HERSHEY_SIMPLEX,0.5,status_color[status])
    #Display instructions on the frame
    cv2.putText(frame,f'FPS: {fps:.2f}',(20,80), cv2.FONT_HERSHEY_SIMPLEX, 0.6,(255,255,255))
    

In [17]:
# SOme variables that will be used in the main loop
# frame count
frame_count = 0
#placeholder fps value
fps = 0
#used to record the time when we process last frames
prev_frame_time = 0

#used to record the time at which we processed current frames
next_frame_time = 0

# set status colors
status_color = {
    'Object Detected':(0,255,0),
    'No Object Detected':(0,0,255)
}



In [18]:
# Main loop
# We start the pipeline and acquire video frames from the “preview” queue and get the NN outputs (detections and bounding box mapping) from the “det_out” queue.

# Once we have the outputs, we display the spacial information and bounding box on the image frame.

with dai.Device(pipeline) as device:
    #output queue will be used to get the right camera frame from the outputs defined above
    q_cam = device.getOutputQueue(name="preview", maxSize=1,blocking=False)

    #output queue will be used to get nn data from the video frames
    q_det = device.getOutputQueue(name="det_out", maxSize=1, blocking=False)

    # # Output queue will be used to get nn data from the video frames
    # q_bbox_depth_mapping = device.getOutputQueue(name="bbox_depth_mapping_out", maxSize=4, blocking=False)

    while True:
        # get right camera frame
        in_cam = q_cam.get()
        frame = in_cam.getCvFrame()

        bbox = None
        coordinates = None

        inDet = q_det.tryGet()

        if inDet != None:
            detections= inDet.detections
            # if face detected
            if len(detections) != 0:
                detection = detections[0]

                #correct bounding box
                xmin = max(0,detection.xmin)
                ymin = max(0,detection.ymin)
                xmax = min(detection.xmax, 1)
                ymax = min(detection.ymax, 1)

                #calculate coordinates
                x = int(xmin*FRAME_SIZE[0])
                y = int(ymin*FRAME_SIZE[1])
                w = int(xmax*FRAME_SIZE[0] - xmin*FRAME_SIZE[0])
                h = int(ymax*FRAME_SIZE[1] - ymin*FRAME_SIZE[1])

                bbox = (x,y,w,h)

                # Get special coordinates
                coord_x = detection.spatialCoordinates.x
                coord_y = detection.spatialCoordinates.y
                coord_z = detection.spatialCoordinates.z
                print(coord_z)
                coordinates = (coord_x,coord_y,coord_z)
        # check if a face was detected in the frame
        if bbox:
            status = 'Object Detected'
        else:
            status = 'No Object Detected'

        #display info on frame
        display_info(frame, bbox, coordinates, status, status_color,fps)

        #Calculate avg fps
        if frame_count % 10==0:
            # Time when we finish processing last 100 frames
            new_frame_time = time.time()

            #Fps will be number of frame processed in one second 
            fps = 1 / ((new_frame_time-prev_frame_time)/10)
            prev_frame_time = new_frame_time

        # Capture the key pressed
        key_pressed = cv2.waitKey(1) & 0xff

        # Stop the program if Esc key was pressed
        if key_pressed == 27:
            break

        # Display the final frame
        cv2.imshow("Face Cam",frame)

        #Increment frame count
        frame_count += 1

cv2.destroyAllWindows()

                

KeyboardInterrupt: 

: 

In [64]:
# parse arguments
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model", help="Provide model path for inference",
                    default='yolov4_tiny_coco_416x416', type=str)
parser.add_argument("-c", "--config", help="Provide config path for inference",
                    default='json/yolov4-tiny.json', type=str)
args = parser.parse_args("--model yolov4_tiny_coco_416x416 --config D:\Downloads\Capstone_camera".split())
CONFIG_PATH = args.config



In [65]:
# create blob, NN, and preview managers
if Path(args.model).exists():
    # initialize blob manager with path to the blob
    bm = BlobManager(blobPath=args.model)
else:
    # initialize blob manager with the name of the model otherwise
    bm = BlobManager(zooName=args.model)

nm = NNetManager(nnFamily="YOLO", inputSize=4)
nm.readConfig(CONFIG_PATH)  # this will also parse the correct input size

pm = PipelineManager()
pm.createColorCam(previewSize=nm.inputSize, xout=True)

# create preview manager
fpsHandler = FPSHandler()
pv = PreviewManager(display=[Previews.color.name], scale={"color":0.33}, fpsHandler=fpsHandler)

# create NN with managers
nn = nm.createNN(pipeline=pm.pipeline, nodes=pm.nodes, source=Previews.color.name,
                 blobPath=bm.getBlob(shaves=6, openvinoVersion=pm.pipeline.getOpenVINOVersion(), zooType="depthai"))
pm.addNn(nn)

# initialize pipeline
with dai.Device(pm.pipeline) as device:
    # create outputs
    pv.createQueues(device)
    nm.createQueues(device)

    nnData = []

    while True:

        # parse outputs
        pv.prepareFrames()
        inNn = nm.outputQueue.tryGet()

        if inNn is not None:
            nnData = nm.decode(inNn)
            # count FPS
            fpsHandler.tick("color")

        nm.draw(pv, nnData)
        pv.showFrames()

        if cv2.waitKey(1) == ord('q'):
            break

PermissionError: [Errno 13] Permission denied: 'D:\\Downloads\\Capstone_camera'