Skip to content

Commit

Permalink
Merge pull request #94 from luxonis/nn_on_left_right
Browse files Browse the repository at this point in the history
Run NN on left/right mono cameras; add support for second stage NN
  • Loading branch information
SzabolcsGergely committed Jul 20, 2020
2 parents a596142 + 84aa59c commit 99a60b3
Show file tree
Hide file tree
Showing 13 changed files with 203 additions and 82 deletions.
5 changes: 2 additions & 3 deletions calibrate.py
Expand Up @@ -155,10 +155,9 @@ def __init__(self):
{
'mono':
{
# 1280x720, 640x400 (binning enabled)
# only 720/30 fps supported for now
# 1280x720, 1280x800, 640x400 (binning enabled)
'resolution_h': 720,
'fps': 30,
'fps': 30.0,
},
},
}
Expand Down
Binary file modified depthai.cmd
Binary file not shown.
Binary file modified depthai.cpython-36m-x86_64-linux-gnu.so
Binary file not shown.
Binary file modified depthai.cpython-37m-arm-linux-gnueabihf.so
Binary file not shown.
144 changes: 109 additions & 35 deletions depthai.py
Expand Up @@ -6,7 +6,7 @@
import os
import subprocess
from time import time, sleep, monotonic

from datetime import datetime
import cv2
import numpy as np
import depthai
Expand All @@ -17,7 +17,7 @@

from depthai_helpers.object_tracker_handler import show_tracklets

global args
global args, cnn_model2
try:
args = vars(parse_args())
except:
Expand Down Expand Up @@ -83,6 +83,22 @@
suffix="_depth"
blob_file_config = cnn_model_path + suffix + ".json"

blob_file2 = ""
blob_file_config2 = ""
cnn_model2 = None
if args['cnn_model2']:
print("Using CNN2:", args['cnn_model2'])
cnn_model2 = args['cnn_model2']
cnn_model_path = consts.resource_paths.nn_resource_path + args['cnn_model2']+ "/" + args['cnn_model2']
blob_file2 = cnn_model_path + ".blob"
blob_file_config2 = cnn_model_path + ".json"
if not Path(blob_file2).exists():
cli_print("\nWARNING: NN2 blob not found in: " + blob_file2, PrintColors.WARNING)
os._exit(1)
if not Path(blob_file_config2).exists():
cli_print("\nWARNING: NN2 json not found in: " + blob_file_config2, PrintColors.WARNING)
os._exit(1)

blob_file_path = Path(blob_file)
blob_file_config_path = Path(blob_file_config)
if not blob_file_path.exists():
Expand Down Expand Up @@ -116,6 +132,13 @@
"Disconnect/connect usb cable on host! \n", PrintColors.RED)
os._exit(1)

if args['cnn_camera'] == 'left_right':
if args['NN_engines'] is None:
args['NN_engines'] = 2
args['shaves'] = 6 if args['shaves'] is None else args['shaves'] - args['shaves'] % 2
args['cmx_slices'] = 6 if args['cmx_slices'] is None else args['cmx_slices'] - args['cmx_slices'] % 2
compile_model = True
cli_print('Running NN on both cams requires 2 NN engines!', PrintColors.RED)

default_blob=True
if compile_model:
Expand All @@ -124,8 +147,6 @@
cmx_slices = args['cmx_slices']
NCE_nr = args['NN_engines']

outblob_file = blob_file + ".sh" + str(shave_nr) + "cmx" + str(cmx_slices) + "NCE" + str(NCE_nr)

if NCE_nr == 2:
if shave_nr % 2 == 1 or cmx_slices % 2 == 1:
cli_print("shave_nr and cmx_slices config must be even number when NCE is 2!", PrintColors.RED)
Expand All @@ -135,7 +156,9 @@
else:
shave_nr_opt = int(shave_nr)
cmx_slices_opt = int(cmx_slices)


outblob_file = blob_file + ".sh" + str(shave_nr) + "cmx" + str(cmx_slices) + "NCE" + str(NCE_nr)

if(not Path(outblob_file).exists()):
cli_print("Compiling model for {0} shaves, {1} cmx_slices and {2} NN_engines ".format(str(shave_nr), str(cmx_slices), str(NCE_nr)), PrintColors.RED)
ret = depthai.download_blob(args['cnn_model'], shave_nr_opt, cmx_slices_opt, NCE_nr, outblob_file)
Expand All @@ -150,6 +173,22 @@
cli_print("Compiled mode found: compiled for {0} shaves, {1} cmx_slices and {2} NN_engines ".format(str(shave_nr), str(cmx_slices), str(NCE_nr)), PrintColors.GREEN)
blob_file = outblob_file

if args['cnn_model2']:
outblob_file = blob_file2 + ".sh" + str(shave_nr) + "cmx" + str(cmx_slices) + "NCE" + str(NCE_nr)
if(not Path(outblob_file).exists()):
cli_print("Compiling model2 for {0} shaves, {1} cmx_slices and {2} NN_engines ".format(str(shave_nr), str(cmx_slices), str(NCE_nr)), PrintColors.RED)
ret = depthai.download_blob(args['cnn_model2'], shave_nr_opt, cmx_slices_opt, NCE_nr, outblob_file)
# ret = subprocess.call(['model_compiler/download_and_compile.sh', args['cnn_model'], shave_nr_opt, cmx_slices_opt, NCE_nr])
print(str(ret))
if(ret != 0):
cli_print("Model compile failed. Falling back to default.", PrintColors.WARNING)
default_blob=True
else:
blob_file2 = outblob_file
else:
cli_print("Compiled mode found: compiled for {0} shaves, {1} cmx_slices and {2} NN_engines ".format(str(shave_nr), str(cmx_slices), str(NCE_nr)), PrintColors.GREEN)
blob_file2 = outblob_file

if default_blob:
#default
shave_nr = 7
Expand All @@ -169,14 +208,17 @@
'calibration_file': consts.resource_paths.calib_fpath,
'padding_factor': 0.3,
'depth_limit_m': 10.0, # In meters, for filtering purpose during x,y,z calc
'confidence_threshold' : 0.5, #Depth is calculated for bounding boxes with confidence higher than this number
'confidence_threshold' : 0.5, #Depth is calculated for bounding boxes with confidence higher than this number
},
'ai':
{
'blob_file': blob_file,
'blob_file_config': blob_file_config,
'blob_file2': blob_file2,
'blob_file_config2': blob_file_config2,
'calc_dist_to_bb': calc_dist_to_bb,
'keep_aspect_ratio': not args['full_fov_nn'],
'camera_input': args['cnn_camera'],
'shaves' : shave_nr,
'cmx_slices' : cmx_slices,
'NN_engines' : NCE_nr,
Expand Down Expand Up @@ -209,8 +251,7 @@
},
'mono':
{
# 1280x720, 640x400 (binning enabled)
# only 720/30 fps supported for now
# 1280x720, 1280x800, 640x400 (binning enabled)
'resolution_h': args['mono_resolution'],
'fps': args['mono_fps'],
},
Expand Down Expand Up @@ -285,13 +326,25 @@
t_start = time()
frame_count = {}
frame_count_prev = {}
for s in stream_names:
frame_count[s] = 0
frame_count_prev[s] = 0

nnet_prev = {}
nnet_prev["entries_prev"] = []
nnet_prev["nnet_source"] = []
nnet_prev["entries_prev"] = {}
nnet_prev["nnet_source"] = {}
frame_count['nn'] = {}
frame_count_prev['nn'] = {}
for s in stream_names:
stream_windows = []
if s == 'previewout':
for cam in {'rgb', 'left', 'right'}:
nnet_prev["entries_prev"][cam] = []
nnet_prev["nnet_source"][cam] = []
frame_count['nn'][cam] = 0
frame_count_prev['nn'][cam] = 0
stream_windows.append(s + '-' + cam)
else:
stream_windows.append(s)
for w in stream_windows:
frame_count[w] = 0
frame_count_prev[w] = 0

tracklets = None

Expand Down Expand Up @@ -332,60 +385,73 @@ def on_trackbar_change(value):
os._exit(10)

for _, nnet_packet in enumerate(nnet_packets):
frame_count["metaout"] += 1

nnet_prev["nnet_source"] = nnet_packet
nnet_prev["entries_prev"] = decode_nn(nnet_packet)
camera = nnet_packet.getMetadata().getCameraName()
nnet_prev["nnet_source"][camera] = nnet_packet
nnet_prev["entries_prev"][camera] = decode_nn(nnet_packet, config=config)
frame_count['metaout'] += 1
frame_count['nn'][camera] += 1

for packet in data_packets:
window_name = packet.stream_name
if packet.stream_name not in stream_names:
continue # skip streams that were automatically added
packetData = packet.getData()
if packetData is None:
print('Invalid packet data!')
continue
elif packet.stream_name == 'previewout':

camera = packet.getMetadata().getCameraName()
window_name = 'previewout-' + camera
# the format of previewout image is CHW (Chanel, Height, Width), but OpenCV needs HWC, so we
# change shape (3, 300, 300) -> (300, 300, 3)
data0 = packetData[0,:,:]
data1 = packetData[1,:,:]
data2 = packetData[2,:,:]
frame = cv2.merge([data0, data1, data2])

nn_frame = show_nn(nnet_prev["entries_prev"], frame, labels=labels, config=config)
nn_frame = show_nn(nnet_prev["entries_prev"][camera], frame, labels=labels, config=config)
if enable_object_tracker and tracklets is not None:
nn_frame = show_tracklets(tracklets, nn_frame, labels)
cv2.putText(nn_frame, "fps: " + str(frame_count_prev[packet.stream_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0))
cv2.imshow('previewout', nn_frame)
cv2.putText(nn_frame, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0))
cv2.putText(nn_frame, "NN fps: " + str(frame_count_prev['nn'][camera]), (2, frame.shape[0]-4), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0))
cv2.imshow(window_name, nn_frame)
elif packet.stream_name == 'left' or packet.stream_name == 'right' or packet.stream_name == 'disparity':
frame_bgr = packetData
cv2.putText(frame_bgr, packet.stream_name, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0))
cv2.putText(frame_bgr, "fps: " + str(frame_count_prev[packet.stream_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0))
cv2.putText(frame_bgr, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0))
if args['draw_bb_depth']:
show_nn(nnet_prev["entries_prev"], frame_bgr, labels=labels, config=config, nn2depth=nn2depth)
cv2.imshow(packet.stream_name, frame_bgr)
camera = args['cnn_camera']
if packet.stream_name == 'disparity':
if camera == 'left_right':
camera = 'right'
elif camera != 'rgb':
camera = packet.getMetadata().getCameraName()
show_nn(nnet_prev["entries_prev"][camera], frame_bgr, labels=labels, config=config, nn2depth=nn2depth)
cv2.imshow(window_name, frame_bgr)
elif packet.stream_name.startswith('depth'):
frame = packetData

if len(frame.shape) == 2:
if frame.dtype == np.uint8: # grayscale
cv2.putText(frame, packet.stream_name, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255))
cv2.putText(frame, "fps: " + str(frame_count_prev[packet.stream_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255))
cv2.putText(frame, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255))
else: # uint16
frame = (65535 // frame).astype(np.uint8)
#colorize depth map, comment out code below to obtain grayscale
frame = cv2.applyColorMap(frame, cv2.COLORMAP_HOT)
# frame = cv2.applyColorMap(frame, cv2.COLORMAP_JET)
cv2.putText(frame, packet.stream_name, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, 255)
cv2.putText(frame, "fps: " + str(frame_count_prev[packet.stream_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, 255)
cv2.putText(frame, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, 255)
else: # bgr
cv2.putText(frame, packet.stream_name, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255))
cv2.putText(frame, "fps: " + str(frame_count_prev[packet.stream_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, 255)
cv2.putText(frame, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, 255)

if args['draw_bb_depth']:
show_nn(nnet_prev["entries_prev"], frame, labels=labels, config=config, nn2depth=nn2depth)
cv2.imshow(packet.stream_name, frame)
camera = args['cnn_camera']
if camera == 'left_right':
camera = 'right'
show_nn(nnet_prev["entries_prev"][camera], frame, labels=labels, config=config, nn2depth=nn2depth)
cv2.imshow(window_name, frame)

elif packet.stream_name == 'jpegout':
jpg = packetData
Expand All @@ -404,21 +470,29 @@ def on_trackbar_change(value):
' CSS:' + '{:6.2f}'.format(dict_['sensors']['temperature']['css']),
' MSS:' + '{:6.2f}'.format(dict_['sensors']['temperature']['mss']),
' UPA:' + '{:6.2f}'.format(dict_['sensors']['temperature']['upa0']),
' DSS:' + '{:6.2f}'.format(dict_['sensors']['temperature']['upa1']))
' DSS:' + '{:6.2f}'.format(dict_['sensors']['temperature']['upa1']))
elif packet.stream_name == 'object_tracker':
tracklets = packet.getObjectTracker()

frame_count[packet.stream_name] += 1
frame_count[window_name] += 1

t_curr = time()
if t_start + 1.0 < t_curr:
t_start = t_curr
# print("metaout fps: " + str(frame_count_prev["metaout"]))

for s in stream_names:
frame_count_prev[s] = frame_count[s]
frame_count[s] = 0

stream_windows = []
if s == 'previewout':
for cam in {'rgb', 'left', 'right'}:
stream_windows.append(s + '-' + cam)
frame_count_prev['nn'][cam] = frame_count['nn'][cam]
frame_count['nn'][cam] = 0
else:
stream_windows.append(s)
for w in stream_windows:
frame_count_prev[w] = frame_count[w]
frame_count[w] = 0

key = cv2.waitKey(1)
if key == ord('c'):
Expand Down
2 changes: 1 addition & 1 deletion depthai_helpers/age_gender_recognition_handler.py
Expand Up @@ -2,7 +2,7 @@
import numpy as np


def decode_age_gender_recognition(nnet_packet):
def decode_age_gender_recognition(nnet_packet, **kwargs):
detections = []
for _, e in enumerate(nnet_packet.entries()):
if e[1]["female"] > 0.8 or e[1]["male"] > 0.8:
Expand Down
16 changes: 10 additions & 6 deletions depthai_helpers/cli_utils.py
Expand Up @@ -51,13 +51,13 @@ def parse_args():
parser.add_argument("-nce", "--NN_engines", default=None, type=int,
help="Number of NN_engines used by NN.")
parser.add_argument("-rgbr", "--rgb_resolution", default=1080, type=int,
help="RGB cam res config: 1080 or 2160 are supported.")
parser.add_argument("-rgbf", "--rgb_fps", default=30, type=int,
help="RGB cam fps config: 30 fps is supported.")
help="RGB cam res height: (1920x)1080, (3840x)2160 or (4056)x3040. Default: %(default)s")
parser.add_argument("-rgbf", "--rgb_fps", default=30.0, type=float,
help="RGB cam fps: max 118.0 for H:1080, max 42.0 for H:2160. Default: %(default)s")
parser.add_argument("-monor", "--mono_resolution", default=720, type=int,
help="Mono cam res config: 720 or 480 are supported.")
parser.add_argument("-monof", "--mono_fps", default=30, type=int,
help="Mono cam fps config: 30 fps is supported.")
help="Mono cam res height: (1280x)720, (1280x)800 or (640x)400 - binning. Default: %(default)s")
parser.add_argument("-monof", "--mono_fps", default=30.0, type=float,
help="Mono cam fps: max 60.0 for H:720 or H:800, max 120.0 for H:400. Default: %(default)s")
parser.add_argument("-dct", "--disparity_confidence_threshold", default=200, type=disparity_ct_type,
help="Disparity_confidence_threshold.")
parser.add_argument("-fv", "--field-of-view", default=None, type=float,
Expand Down Expand Up @@ -85,6 +85,10 @@ def parse_args():
help="Force usb2 connection")
parser.add_argument("-cnn", "--cnn_model", default="mobilenet-ssd", type=str,
help="Cnn model to run on DepthAI")
parser.add_argument("-cnn2", "--cnn_model2", default="", type=str,
help="Cnn model to run on DepthAI for second-stage inference")
parser.add_argument('-cam', "--cnn_camera", default='rgb', choices=['rgb', 'left', 'right', 'left_right'],
help='Choose camera input for CNN (default: %(default)s)')
parser.add_argument("-dd", "--disable_depth", default=False, action="store_true",
help="Disable depth calculation on CNN models with bounding box output")
parser.add_argument("-bb", "--draw-bb-depth", default=False, action="store_true",
Expand Down
2 changes: 1 addition & 1 deletion depthai_helpers/emotion_recognition_handler.py
Expand Up @@ -2,7 +2,7 @@
import numpy as np


def decode_emotion_recognition(nnet_packet):
def decode_emotion_recognition(nnet_packet, **kwargs):
detections = []
for i in range(len(nnet_packet.entries()[0][0])):
detections.append(nnet_packet.entries()[0][0][i])
Expand Down
2 changes: 1 addition & 1 deletion depthai_helpers/landmarks_recognition_handler.py
Expand Up @@ -2,7 +2,7 @@
import numpy as np


def decode_landmarks_recognition(nnet_packet):
def decode_landmarks_recognition(nnet_packet, **kwargs):
landmarks = []
for i in range(len(nnet_packet.entries()[0][0])):
landmarks.append(nnet_packet.entries()[0][0][i])
Expand Down

0 comments on commit 99a60b3

Please sign in to comment.