diff --git a/depthai-core b/depthai-core index 0eaab54a7..eaf90f98b 160000 --- a/depthai-core +++ b/depthai-core @@ -1 +1 @@ -Subproject commit 0eaab54a7e3415ec135d315dbd33d931475c0097 +Subproject commit eaf90f98b369785b66a3a3fa6807baf19e0f0fe6 diff --git a/docs/source/_static/images/examples/normalize_model.png b/docs/source/_static/images/examples/normalize_model.png new file mode 100644 index 000000000..be861497a Binary files /dev/null and b/docs/source/_static/images/examples/normalize_model.png differ diff --git a/docs/source/samples/NeuralNetwork/concat_multi_input.rst b/docs/source/samples/NeuralNetwork/concat_multi_input.rst new file mode 100644 index 000000000..5b5f93952 --- /dev/null +++ b/docs/source/samples/NeuralNetwork/concat_multi_input.rst @@ -0,0 +1,39 @@ +Multi-Input Frame Concationation +================================ + +Example concatenates all 3 inputs with a simple custom model created with PyTorch (`link here `__, +`tutorial here `__). +It uses :ref:`NeuralNetwork`'s multiple input feature and links all 3 camera streams directly to the NeuralNetwork node. + +Demo +#### + +.. image:: https://user-images.githubusercontent.com/18037362/134209980-09c6e2f9-8a26-45d5-a6ad-c31d9e2816e1.png + +Setup +##### + +.. include:: /includes/install_from_pypi.rst + +Source code +########### + +.. tabs:: + + .. tab:: Python + + Also `available on GitHub `__ + + .. literalinclude:: ../../../../examples/NeuralNetwork/concat_multi_input.py + :language: python + :linenos: + + .. tab:: C++ + + Also `available on GitHub `__ + + .. literalinclude:: ../../../../depthai-core/examples/NeuralNetwork/concat_multi_input.cpp + :language: cpp + :linenos: + +.. include:: /includes/footer-short.rst diff --git a/docs/source/samples/NeuralNetwork/normalization_multi_input.rst b/docs/source/samples/NeuralNetwork/normalization_multi_input.rst new file mode 100644 index 000000000..75007c617 --- /dev/null +++ b/docs/source/samples/NeuralNetwork/normalization_multi_input.rst @@ -0,0 +1,46 @@ +Frame Normalization +=================== + +This example shows how you can normalize a frame before sending it to another neural network. Many neural network models +require frames with RGB values (pixels) in range between :code:`-0.5` to :code:`0.5`. :ref:`ColorCamera`'s preview outputs +values between :code:`0` and :code:`255`. Simple custom model, created with PyTorch (`link here `__, `tutorial here `__), +allows users to specify mean and scale factors that will be applied to all frame values (pixels). + +.. math:: + + output = (input - mean) / scale + +.. image:: /_static/images/examples/normalize_model.png + +On the host, values are converted back to :code:`0`-:code:`255`, so they can be displayed by OpenCV. + +.. note:: + This is just a demo, for normalization you should use OpenVINO's `model optimizer `__ arguments :code:`--mean_values` and :code:`--scale_values`. + +Setup +##### + +.. include:: /includes/install_from_pypi.rst + +Source code +########### + +.. tabs:: + + .. tab:: Python + + Also `available on GitHub `__ + + .. literalinclude:: ../../../../examples/NeuralNetwork/normalization_multi_input.py + :language: python + :linenos: + + .. tab:: C++ + + Also `available on GitHub `__ + + .. literalinclude:: ../../../../depthai-core/examples/NeuralNetwork/normalization_multi_input.cpp + :language: cpp + :linenos: + +.. include:: /includes/footer-short.rst diff --git a/docs/source/tutorials/code_samples.rst b/docs/source/tutorials/code_samples.rst index 29e82a814..f01f849a4 100644 --- a/docs/source/tutorials/code_samples.rst +++ b/docs/source/tutorials/code_samples.rst @@ -16,6 +16,7 @@ Code Samples ../samples/mixed/* ../samples/MobileNet/* ../samples/MonoCamera/* + ../samples/NeuralNetwork/* ../samples/ObjectTracker/* ../samples/Script/* ../samples/SpatialDetection/* @@ -95,6 +96,11 @@ are presented with code. - :ref:`Mono Camera Control` - Demonstrates how to control the mono camera (crop, exposure, sensitivity) from the host - :ref:`Mono Full Resolution Saver` - Saves mono (720P) images to the host (:code:`.png`) +.. rubric:: NeuralNetwork + +- :ref:`Multi-Input Frame Concat ` - Concat mono/rgb streams on the device with a custom model +- :ref:`Frame Normalization` - Normalize the frame on the device with a custom model + .. rubric:: ObjectTracker - :ref:`Object tracker on video` - Performs object tracking from the video diff --git a/examples/NeuralNetwork/concat_multi_input.py b/examples/NeuralNetwork/concat_multi_input.py new file mode 100644 index 000000000..bdc901ca9 --- /dev/null +++ b/examples/NeuralNetwork/concat_multi_input.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 + +from pathlib import Path +import sys +import numpy as np +import cv2 +import depthai as dai +SHAPE = 300 + +# Get argument first +nnPath = str((Path(__file__).parent / Path('../models/concat_openvino_2021.4_6shave.blob')).resolve().absolute()) +if len(sys.argv) > 1: + nnPath = sys.argv[1] + +if not Path(nnPath).exists(): + import sys + raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"') + +p = dai.Pipeline() +p.setOpenVINOVersion(dai.OpenVINO.VERSION_2021_4) + +camRgb = p.createColorCamera() +camRgb.setPreviewSize(SHAPE, SHAPE) +camRgb.setInterleaved(False) +camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR) + +def create_mono(p, socket): + mono = p.create(dai.node.MonoCamera) + mono.setBoardSocket(socket) + mono.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) + + # ImageManip for cropping (face detection NN requires input image of 300x300) and to change frame type + manip = p.create(dai.node.ImageManip) + manip.initialConfig.setResize(300, 300) + manip.initialConfig.setFrameType(dai.RawImgFrame.Type.BGR888p) + mono.out.link(manip.inputImage) + return manip.out + +# NN that detects faces in the image +nn = p.createNeuralNetwork() +nn.setBlobPath(nnPath) +nn.setNumInferenceThreads(2) + +camRgb.preview.link(nn.inputs['img2']) +create_mono(p, dai.CameraBoardSocket.LEFT).link(nn.inputs['img1']) +create_mono(p, dai.CameraBoardSocket.RIGHT).link(nn.inputs['img3']) + +# Send bouding box from the NN to the host via XLink +nn_xout = p.createXLinkOut() +nn_xout.setStreamName("nn") +nn.out.link(nn_xout.input) + +# Pipeline is defined, now we can connect to the device +with dai.Device(p) as device: + qNn = device.getOutputQueue(name="nn", maxSize=4, blocking=False) + shape = (3, SHAPE, SHAPE * 3) + + while True: + inNn = np.array(qNn.get().getData()) + frame = inNn.view(np.float16).reshape(shape).transpose(1, 2, 0).astype(np.uint8).copy() + + cv2.imshow("Concat", frame) + + if cv2.waitKey(1) == ord('q'): + break diff --git a/examples/NeuralNetwork/normalization_multi_input.py b/examples/NeuralNetwork/normalization_multi_input.py new file mode 100755 index 000000000..62594c8ef --- /dev/null +++ b/examples/NeuralNetwork/normalization_multi_input.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 + +from pathlib import Path +import sys +import numpy as np +import cv2 +import depthai as dai +SHAPE = 300 + +# Get argument first +nnPath = str((Path(__file__).parent / Path('../models/normalize_openvino_2021.4_4shave.blob')).resolve().absolute()) +if len(sys.argv) > 1: + nnPath = sys.argv[1] + +if not Path(nnPath).exists(): + import sys + raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"') + +p = dai.Pipeline() +p.setOpenVINOVersion(dai.OpenVINO.VERSION_2021_4) + +camRgb = p.createColorCamera() +# Model expects values in FP16, as we have compiled it with `-ip FP16` +camRgb.setFp16(True) +camRgb.setInterleaved(False) +camRgb.setPreviewSize(SHAPE, SHAPE) + +nn = p.createNeuralNetwork() +nn.setBlobPath(nnPath) +nn.setNumInferenceThreads(2) + +script = p.create(dai.node.Script) +script.setScript(""" +# Run script only once. We could also send these values from host. +# Model formula: +# output = (input - mean) / scale + +# This configuration will subtract all frame values (pixels) by 127.5 +# 0.0 .. 255.0 -> -127.5 .. 127.5 +data = NNData(2) +data.setLayer("mean", [127.5]) +node.io['mean'].send(data) + +# This configuration will divide all frame values (pixels) by 255.0 +# -127.5 .. 127.5 -> -0.5 .. 0.5 +data = NNData(2) +data.setLayer("scale", [255.0]) +node.io['scale'].send(data) +""") + +# Re-use the initial values for multiplier/addend +script.outputs['mean'].link(nn.inputs['mean']) +nn.inputs['mean'].setWaitForMessage(False) + +script.outputs['scale'].link(nn.inputs['scale']) +nn.inputs['scale'].setWaitForMessage(False) +# Always wait for the new frame before starting inference +camRgb.preview.link(nn.inputs['frame']) + +# Send normalized frame values to host +nn_xout = p.createXLinkOut() +nn_xout.setStreamName("nn") +nn.out.link(nn_xout.input) + +# Pipeline is defined, now we can connect to the device +with dai.Device(p) as device: + qNn = device.getOutputQueue(name="nn", maxSize=4, blocking=False) + shape = (3, SHAPE, SHAPE) + while True: + inNn = np.array(qNn.get().getData()) + # Get back the frame. It's currently normalized to -0.5 - 0.5 + frame = inNn.view(np.float16).reshape(shape).transpose(1, 2, 0) + # To get original frame back (0-255), we add multiply all frame values (pixels) by 255 and then add 127.5 to them + frame = (frame * 255.0 + 127.5).astype(np.uint8) + # Show the initial frame + cv2.imshow("Original frame", frame) + + if cv2.waitKey(1) == ord('q'): + break diff --git a/examples/models/concat-model/model.yml b/examples/models/concat-model/model.yml new file mode 100644 index 000000000..3f26f962c --- /dev/null +++ b/examples/models/concat-model/model.yml @@ -0,0 +1,25 @@ +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +description: >- + concat-model +task_type: image_inpainting # Just random task type, so it's supported by model downloader +files: + - name: concat_openvino_2021.4_6shave.blob + size: 1024 + sha256: 6ac3023ea8dac9b7501ead0f9b2c2a4495d2791a58b7049de065246455cf87be + source: https://artifacts.luxonis.com/artifactory/luxonis-depthai-data-local/network/concat_openvino_2021.4_6shave.blob + +framework: dldt +license: https://raw.githubusercontent.com/openvinotoolkit/open_model_zoo/master/LICENSE diff --git a/examples/models/normalization-model/model.yml b/examples/models/normalization-model/model.yml new file mode 100644 index 000000000..ce6c65ed5 --- /dev/null +++ b/examples/models/normalization-model/model.yml @@ -0,0 +1,25 @@ +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +description: >- + normalization-model +task_type: image_inpainting # Just random task type, so it's supported by model downloader +files: + - name: normalize_openvino_2021.4_4shave.blob + size: 1216 + sha256: 9b5dc4b375ed9218c2910284556f8152cbfcc0130e7b7a42d9a9991eae8be23a + source: https://artifacts.luxonis.com/artifactory/luxonis-depthai-data-local/network/normalize_openvino_2021.4_4shave.blob + +framework: dldt +license: https://raw.githubusercontent.com/openvinotoolkit/open_model_zoo/master/LICENSE