From 7d384f7ec548dd09225b2feac23ef0eb92167fde Mon Sep 17 00:00:00 2001 From: ganler Date: Fri, 20 Nov 2020 03:38:43 +0800 Subject: [PATCH 1/4] feat: enable pifpaf inference support --- cmake/hyperpose.cmake | 5 + ...ator_api_batched_images_pifpaf.example.cpp | 77 ++ include/hyperpose/hyperpose.hpp | 1 + include/hyperpose/operator/parser/pifpaf.hpp | 23 + src/pifpaf.cpp | 161 +++ src/pifpaf_decoder/aiapp.hpp | 116 ++ src/pifpaf_decoder/image_based.hpp | 140 ++ src/pifpaf_decoder/math_helpers.cpp | 94 ++ src/pifpaf_decoder/math_helpers.hpp | 21 + src/pifpaf_decoder/object_detection.hpp | 48 + .../openpifpaf_postprocessor.cpp | 1162 +++++++++++++++++ .../openpifpaf_postprocessor.hpp | 188 +++ 12 files changed, 2036 insertions(+) create mode 100644 examples/operator_api_batched_images_pifpaf.example.cpp create mode 100644 include/hyperpose/operator/parser/pifpaf.hpp create mode 100644 src/pifpaf.cpp create mode 100644 src/pifpaf_decoder/aiapp.hpp create mode 100644 src/pifpaf_decoder/image_based.hpp create mode 100644 src/pifpaf_decoder/math_helpers.cpp create mode 100644 src/pifpaf_decoder/math_helpers.hpp create mode 100644 src/pifpaf_decoder/object_detection.hpp create mode 100644 src/pifpaf_decoder/openpifpaf_postprocessor.cpp create mode 100644 src/pifpaf_decoder/openpifpaf_postprocessor.hpp diff --git a/cmake/hyperpose.cmake b/cmake/hyperpose.cmake index 6ebb96b5..28131965 100644 --- a/cmake/hyperpose.cmake +++ b/cmake/hyperpose.cmake @@ -5,10 +5,15 @@ set(POSE_LIB_NAME hyperpose) INCLUDE(cmake/cuda.cmake) FIND_PACKAGE(OpenCV REQUIRED) +FILE(GLOB PIFPAF_DECODER + src/pifpaf_decoder/*.cpp) + ADD_LIBRARY( ${POSE_LIB_NAME} # SHARED src/logging.cpp src/tensorrt.cpp + src/pifpaf.cpp + ${PIFPAF_DECODER} src/paf.cpp src/data.cpp src/stream.cpp diff --git a/examples/operator_api_batched_images_pifpaf.example.cpp b/examples/operator_api_batched_images_pifpaf.example.cpp new file mode 100644 index 00000000..a187f1bb --- /dev/null +++ b/examples/operator_api_batched_images_pifpaf.example.cpp @@ -0,0 +1,77 @@ +#include "utils.hpp" +#include +#include +#include + +// Model flags +DEFINE_string(model_file, "../data/models/openpifpaf-resnet50.onnx", "Path to the model."); + +DEFINE_bool(logging, false, "Print the logging information or not."); +DEFINE_int32(input_height, 640, "Height of input image."); +DEFINE_int32(input_width, 427, "Width of input image."); + +DEFINE_string(input_folder, "../data/media", "Folder of images to inference."); + +int main(int argc, char** argv) +{ + gflags::ParseCommandLineFlags(&argc, &argv, true); + + // * Collect data into batch. + std::vector batch = glob_images(FLAGS_input_folder); + + if (batch.empty()) { + example_log() << "No input images got. Exiting.\n"; + exit(-1); + } + + example_log() << "Batch shape: [" << batch.size() << ", 3, " << FLAGS_input_height << ", " << FLAGS_input_width << "]\n"; + + // * Create TensorRT engine. + namespace hp = hyperpose; + if (FLAGS_logging) + hp::enable_logging(); + + auto engine = [&] { + using namespace hp::dnn; + constexpr std::string_view onnx_suffix = ".onnx"; + constexpr std::string_view uff_suffix = ".uff"; + + if (std::equal(onnx_suffix.crbegin(), onnx_suffix.crend(), FLAGS_model_file.crbegin())) + return tensorrt(onnx{ FLAGS_model_file }, { FLAGS_input_width, FLAGS_input_height }, batch.size()); + + example_log() << "Your model file's suffix is not [.onnx | .uff]. Your model file path: " << FLAGS_model_file; + example_log() << "Trying to be viewed as a serialized TensorRT model."; + + return tensorrt(tensorrt_serialized{ FLAGS_model_file }, { FLAGS_input_width, FLAGS_input_height }, batch.size()); + }(); + + hp::parser::pifpaf parser{}; + + using clk_t = std::chrono::high_resolution_clock; + auto beg = clk_t::now(); + { + // * TensorRT Inference. + auto feature_map_packets = engine.inference(batch); + for (const auto& packet : feature_map_packets) + for (const auto& feature_map : packet) + example_log() << feature_map << std::endl; + + // * Paf. + std::vector> pose_vectors; + pose_vectors.reserve(feature_map_packets.size()); + for (auto&& packet : feature_map_packets) { + pose_vectors.push_back(parser.process(packet[0], packet[1])); + } + + std::cout << batch.size() << " images got processed. FPS = " + << 1000. * batch.size() / std::chrono::duration(clk_t::now() - beg).count() + << '\n'; + + for (size_t i = 0; i < batch.size(); ++i) { + cv::resize(batch[i], batch[i], { FLAGS_input_width, FLAGS_input_height }); + for (auto&& pose : pose_vectors[i]) + hp::draw_human(batch[i], pose); + cv::imwrite("output_" + std::to_string(i) + ".png", batch[i]); + } + } +} \ No newline at end of file diff --git a/include/hyperpose/hyperpose.hpp b/include/hyperpose/hyperpose.hpp index a851c758..b9df8fcb 100644 --- a/include/hyperpose/hyperpose.hpp +++ b/include/hyperpose/hyperpose.hpp @@ -9,6 +9,7 @@ #include "operator/dnn/tensorrt.hpp" #include "operator/parser/paf.hpp" +#include "operator/parser/pifpaf.hpp" #include "operator/parser/proposal_network.hpp" #include "stream/stream.hpp" \ No newline at end of file diff --git a/include/hyperpose/operator/parser/pifpaf.hpp b/include/hyperpose/operator/parser/pifpaf.hpp new file mode 100644 index 00000000..5da7990c --- /dev/null +++ b/include/hyperpose/operator/parser/pifpaf.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include "paf.hpp" +#include "../../utility/data.hpp" + +namespace hyperpose::parser { + +class pifpaf{ +public: + explicit pifpaf() = default; + std::vector process(const feature_map_t& pif, const feature_map_t& paf); + template + std::vector process(C&& feature_map_containers) + { + // 1@pif, 2@paf. + assert(feature_map_containers.size() == 2); + return process(feature_map_containers[0], feature_map_containers[1]); + } +private: + float m_keypoint_thresh = 0.001f; +}; + +} // namespace hyperpose \ No newline at end of file diff --git a/src/pifpaf.cpp b/src/pifpaf.cpp new file mode 100644 index 00000000..ca4b71f6 --- /dev/null +++ b/src/pifpaf.cpp @@ -0,0 +1,161 @@ +#include +#include "pifpaf_decoder/openpifpaf_postprocessor.hpp" + +namespace hyperpose::parser { + +// TODO: Name ORDER! +std::vector pifpaf::process(const feature_map_t& paf, const feature_map_t& pif) { + // Helpful links (Chinese): + // https://zhuanlan.zhihu.com/p/93896207 + // https://zhuanlan.zhihu.com/p/68073113 + // pif: [17, 5, h, w] => KEY POINTS; + // 5: [conf, dx, dy, b, scale] + // Example: array([ 0.00527313, 0.13620843, -0.32253477, 0.3263721 , 0.90980804], dtype=float32) + // heat map: f(x, y) = \sum_ij conf * N(x, y|ij) + // paf: [19, 9, h, w] => LIMBS; + // 9: [conf, [x1, y1, x2, y2], [b1, b2], [s1, s2]] + // Example: [ 0.00712654, -0.54057586, 5.4075847 , 3.0354404 , 3.1246614 , 1.0621283 , -3.5857565 , 2.6072054 , 3.8406293 ], + // TODO: OPTIMIZE THIS. + + lpdnn::aiapp_impl::OpenPifPafPostprocessor pp; + size_t h = pif.shape()[pif.shape().size() - 2]; + size_t w = pif.shape().back(); + std::vector pif_conf, pif_xy, pif_s, paf_conf, paf_xy1, paf_xy2, paf_b1, paf_b2; + + const auto tensor_sharding_to_vector = [](const feature_map_t& tensor, std::vector& vec, size_t dim2) { + size_t d0 = tensor.shape()[0]; + size_t d1 = tensor.shape()[1]; + size_t h = tensor.shape()[2]; + size_t w = tensor.shape()[3]; + for (int i = 0; i < d0; ++i) { + for (int j = 0; j < h; ++j) { + for (int k = 0; k < w; ++k) { + vec.push_back(tensor.view()[ + i * d1 * w * h + + dim2 * h * w + + j * w + + k + ]); + } + } + } + }; + + const auto tensor_sharding_to_offset_vector = [](const feature_map_t& tensor, std::vector& vec, size_t dimx, size_t dimy) { + size_t d0 = tensor.shape()[0]; + size_t d1 = tensor.shape()[1]; + size_t h = tensor.shape()[2]; + size_t w = tensor.shape()[3]; + for (int i = 0; i < d0; ++i) { + // X first & Then Y + for (int j = 0; j < h; ++j) { + for (int k = 0; k < w; ++k) { + vec.push_back(tensor.view()[ + i * d1 * w * h + + dimx * h * w + + j * w + + k + ]); + } + } + + for (int j = 0; j < h; ++j) { + for (int k = 0; k < w; ++k) { + vec.push_back(tensor.view()[ + i * d1 * w * h + + dimy * h * w + + j * w + + k + ]); + } + } + } + }; + + pif_conf.reserve(17 * h * w); + tensor_sharding_to_vector(pif, pif_conf, 0); + + pif_xy.reserve(17 * 2 * h * w); + tensor_sharding_to_offset_vector(pif, pif_xy, 1, 2); + + pif_s.reserve(17 * h * w); + tensor_sharding_to_vector(pif, pif_s, 4); + + // [19, 9, h, w] -> [conf, p1, p2, b1, b2, ...] + paf_conf.reserve(19 * h * w); + tensor_sharding_to_vector(paf, paf_conf, 0); + + paf_xy1.reserve(2 * 19 * h * w); + tensor_sharding_to_offset_vector(paf, paf_xy1, 1, 2); + + paf_xy2.reserve(2 * 19 * h * w); + tensor_sharding_to_offset_vector(paf, paf_xy2, 3, 4); + + paf_b1.reserve(19 * h * w); + tensor_sharding_to_vector(paf, paf_b1, 5); + + paf_b2.reserve(19 * h * w); + tensor_sharding_to_vector(paf, paf_b2, 6); + + // TODO: RECOVER THE INP{W, H}; + auto apires = pp.postprocess_0_8(640, 427, w, h, + pif_conf.data(), pif_xy.data(), pif_s.data(), + paf_conf.data(), paf_xy1.data(), paf_xy2.data(), paf_b1.data(), paf_b2.data()); + +// std::cout << "Check pif[0]\t" << pif.view()[0] << std::endl; + std::vector ret{}; + ret.reserve(apires.items.size()); +// std::cout << apires.items.size() << "...size\n"; + + /* + * + OpenPifPaf COCO Topology: https://miro.medium.com/max/366/0*KFrFQVj3OoGAtt6o.png +HyperPose: Unified Topology + * + */ + + for (auto&& item : apires.items) { + if (item.landmarks.points.empty()) + continue; + human_t man{}; + man.score = item.confidence; + + auto p2p = [this](const auto& src, auto& dst) { + if (src.confidence > 0.) { + dst.score = 1;// src.confidence; FIXME + dst.x = src.position.x / 10000.; + dst.y = src.position.y / 10000.; +// std::cout << dst.x << ' ' << dst.y << '\n'; + dst.has_value = true; + } + }; + + auto& from = item.landmarks.points; + auto& to = man.parts; + // OpenPifPaf -> HyperPose + p2p(from[0], to[0]); + // ! to [1] + constexpr std::array from_index = { + 6, 8, 10, 5, 7, 9, + 12, 14, 16, 11, 13, 15, + 2, 1, 4, 3 + }; + + for (size_t i = 0; i < from_index.size(); ++i) { + p2p(from[from_index[i]], to[i+2]); + } + + if (to[2].has_value && to[5].has_value) { + to[1].x = (to[2].x + to[5].x) / 2;; + to[1].y = (to[2].y + to[5].y) / 2;; + to[1].has_value = true; + to[1].score = (to[2].score + to[5].score) / 2; + } + + ret.push_back(man); + } + + return ret; +} + +} // namespace hyperpose \ No newline at end of file diff --git a/src/pifpaf_decoder/aiapp.hpp b/src/pifpaf_decoder/aiapp.hpp new file mode 100644 index 00000000..1beb0774 --- /dev/null +++ b/src/pifpaf_decoder/aiapp.hpp @@ -0,0 +1,116 @@ +/// +/// Ai-app base interface and types +/// +/// \copyright 2018 NVISO SA. All rights reserved. +/// \license This project is released under the XXXXXX License. +/// + +#pragma once + +#include +#include +#include +#include + +namespace lpdnn { +namespace ai_app { + +/// Aiapp Blob +/// This could be improved to allow referring to existing data +/// thus avoding unneeded data-copy, for example by using shared_ptr. +struct Blob { + /// Data dimensions. Mandatory if the blob represents a tensor. + std::vector dim; + + /// Data. Mandatory if the blob represents a tensor. + std::vector data; + + /// Optional raw representation. + std::vector raw; + + /// Optional CBOR representation when data is structured. + std::vector cbor; + + /// Optional additional information + /// (eg, description of internal representation: "NCHW,8bits,dp3"). + std::string info; +}; + +/// AI-App interface +class Aiapp { + public: + virtual ~Aiapp() {} + + /// @return the ai-class id for this aiapp + virtual const char* class_id() const = 0; + + /// @return the implementation id for this aiapp + virtual const char* impl_id() const = 0; + + /// Initialization options + /// \param cfg: configuration string, typically in JSON format. + /// \return: true if success + virtual bool init(const std::string& cfg) = 0; + + /// Set runtime options for the specified component + /// \param opt: runtime options, typically in JSON format. + /// \param name: subcomponent name + /// \return: true if success + virtual bool set_options(const std::string& opt, + const std::string& name = "") = 0; + + /// Introspection methods + /// \{ + + /// \return: names of all direct subcomponents of the specified component + virtual std::vector components( + const std::string& name = "") const = 0; + + /// \return output(s) of the specified component + virtual std::vector output(const std::string& name = "") const = 0; + + /// \return metrics of the specified component and all its subcomponents + virtual std::string metrics(const std::string& name = "") const = 0; + + /// set end-of-execution at the end of the specified component + /// if name is empty any exit-point previously set is removed + virtual bool set_exit_after(const std::string& name = "") = 0; + + /// \} +}; + +/// AiApp standard processing components +/// Each ai-app can contain other sub-components. +/// Each subcomponent can be identified by a pathname, for example: +/// "preprocessing.normalize" +/// "inference.net1.conv23" +struct Component { + /// Standard component names. Their use is not mandatory but + /// allows an ai-app to be supported by existing tools. + static constexpr char const* preprocessing = "preprocessing"; + static constexpr char const* inference = "inference"; + static constexpr char const* postprocessing = "postprocessing"; + + /// Ai-app interface parameters + static constexpr char const* interface = "interface"; + + /// Name separator in a component pathname string. + /// Component names can't contain the separator except possibly for the leafs + static constexpr char separator = '.'; + + /// Concatenate component names in a component pathname + static std::string join(const std::string& path, const std::string& comp) { + return path + separator + comp; + } +}; + +/// AiApp Metrics +struct Metrics { + /// Standard metrics. All timings are in microseconds. + static constexpr char const* init_time = "init_time"; + static constexpr char const* inference_time = "inference_time"; + static constexpr char const* inference_cpu_time = "inference_cpu_time"; +}; + +} // namespace ai_app +} // namespace lpdnn diff --git a/src/pifpaf_decoder/image_based.hpp b/src/pifpaf_decoder/image_based.hpp new file mode 100644 index 00000000..938cedaa --- /dev/null +++ b/src/pifpaf_decoder/image_based.hpp @@ -0,0 +1,140 @@ +/// +/// Ai-app interface and types for image-based ai-apps +/// +/// \copyright 2018 NVISO SA. All rights reserved. +/// \license This project is released under the XXXXXX License. +/// + +#pragma once + +#include "aiapp.hpp" + +namespace lpdnn::ai_app { + +/// 2-dimensional size +struct Dim2d { + int x; + int y; +}; + +/// Rectangle +struct Rect { + Dim2d origin; + Dim2d size; + + [[nodiscard]] bool empty() const { return size.x <= 0 || size.y <= 0; } +}; + +/// Landmarks +struct Landmark { + Dim2d position; + float confidence; /// Negative value if N/A +}; + +struct Landmarks { + /// Landmark specification identifier + std::string type; + /// Landmark points + std::vector points; +}; + +/// Image representation. +/// The data of a RAW image consists of *y scanlines of *x pixels, +/// with each pixel consisting of N interleaved 8-bit components; the first +/// pixel pointed to is top-left-most in the image. There is no padding between +/// image scanlines or between pixels, regardless of format. The number of +/// components N is 3 for RGB images, 4 for RGBA, 1 for grayscale. +/// Support for 8bits RGB format is MANDATORY for all image-processing AiApps. +/// An image can be constructed from a std::vector, or a std::string +/// or raw data pointer and size. When passing rvalues vector or strings, the +/// image will take ownership of the data, otherwise will just keep reference. +class Image { + protected: + /// Contains image data if we have ownership of it + std::vector _image_content; + + public: + /// Image format + enum class Format { + raw_grayscale = 1, /// 8bits grayscale + raw_rgb8 = 3, /// 8bits RGB *MANDATORY* + raw_rgba8 = 4, /// 8bits RGBA + + encoded = 256, /// Standard JPEG/BMP/PNG/TIFF format + + custom = 512 /// Custom format. Use attributes field for more details. + }; + + /// Don't take data ownership. + /// img_dim parameter can be omitted in case of encoded images since + /// this information will be extracted from the image content itself. + Image(Format img_format, const std::vector& data, Dim2d img_dim = {}) + : Image(img_format, data.data(), data.size(), img_dim) {} + + /// Take data ownership + Image(Format img_format, std::vector&& data, Dim2d img_dim = {}) + : _image_content(std::move(data)), + format{img_format}, + dim(img_dim), + data{_image_content.data()}, + data_size{_image_content.size()} {} + + /// Don't take data ownership. + Image(Format img_format, const std::string& data, Dim2d img_dim = {}) + : Image(img_format, (uint8_t*)data.c_str(), data.size(), img_dim) {} + + /// Take data ownership + Image(Format img_format, std::string&& data, Dim2d img_dim = {}) + : Image(img_format, + std::vector((uint8_t*)data.c_str(), + (uint8_t*)data.c_str() + data.size()), + img_dim) { + data.clear(); + } + + /// Don't take data ownership + /// img_data_size is mandatory in case of encoded images. + Image(Format img_format, const uint8_t* img_data, size_t img_data_size, + Dim2d img_dim = {}) + : format{img_format}, + dim(img_dim), + data{img_data}, + data_size{img_data_size} {} + + /// Utility factory methods + static Image encoded(const std::vector& data) { + return Image(Format::encoded, data); + } + + /// Image format + Format format; + + /// Image dimensions (for raw images) + Dim2d dim; + + /// Region of interest inside the image (all if empty) + Rect roi{}; + + /// Custom attributes. + /// This is ai-app specific and allows to specify custom data formats. + std::string attributes; + + /// Pointer to image data (no ownership of the data). + const uint8_t* data; + + /// Size of image data. Mandatory for encoded images. + size_t data_size; + + /// Additional optional information about the image. + /// May be required by some aiapps. + Landmarks landmarks; +}; + +/// Abstract image-based AiApp +class Image_based : virtual public Aiapp { + public: + /// @return supported image formats (ordered by preference) + [[nodiscard]] virtual std::vector image_formats() const = 0; +}; + +} // namespace lpdnn diff --git a/src/pifpaf_decoder/math_helpers.cpp b/src/pifpaf_decoder/math_helpers.cpp new file mode 100644 index 00000000..a571780a --- /dev/null +++ b/src/pifpaf_decoder/math_helpers.cpp @@ -0,0 +1,94 @@ + +#include "math_helpers.hpp" +#include + +#ifdef __APPLE__ +#define MATH_HELPERS_ACCELERATE 1 +#else +#define MATH_HELPERS_ACCELERATE 0 +#endif + +#if MATH_HELPERS_ACCELERATE +#include +#else +#include +#endif + +void vfill(float*x, unsigned long n, float v) { +#if MATH_HELPERS_ACCELERATE + vDSP_vfill(&v, x, 1, n); +#else + // Slow version + for (unsigned long i = 0; i < n; ++i) { + x[i] = v; + } +#endif +} + +void vadd(const float *a, const float *b, float *c, unsigned long n) { +#if MATH_HELPERS_ACCELERATE + vDSP_vadd(a, 1, b, 1, c, 1, n); +#else + // Slow version + for (unsigned long i = 0; i < n; ++i) { + c[i] = a[i] + b[i]; + } +#endif +} + +void vexp(float *x, unsigned long n) { +#if MATH_HELPERS_ACCELERATE + int n_ = (int)n; + vvexpf(x, x, &n_); +#else + // Slow version + for (unsigned long i = 0; i < n; ++i) { + x[i] = std::exp(x[i]); + } +#endif +} + +void vmul(const float *a, const float *b, float *c, unsigned long n) { +#if MATH_HELPERS_ACCELERATE + vDSP_vmul(a, 1, b, 1, c, 1, n); +#else + // Slow version + for (unsigned long i = 0; i < n; ++i) { + c[i] = a[i] * b[i]; + } +#endif +} + +void vsmul(const float *a, float b, float *c, unsigned long n) { +#if MATH_HELPERS_ACCELERATE + vDSP_vsmul(a, 1, &b, c, 1, n); +#else + // Slow version + for (unsigned long i = 0; i < n; ++i) { + c[i] = a[i] * b; + } +#endif +} + +float vargmax(const float *x, unsigned long n, int* i) { + assert(n > 0); +#if MATH_HELPERS_ACCELERATE + float maxValue = 0.0f; + vDSP_Length maxIndex = 0; + vDSP_maxvi(x, 1, &maxValue, &maxIndex, n); + *i = (int)maxIndex; + return maxValue; +#else + // Slow version + float maxValue = x[0]; + unsigned long maxIndex = 0; + for (unsigned long i = 1; i < n; ++i) { + if (x[i] > maxValue) { + maxValue = x[i]; + maxIndex = i; + } + } + *i = (int)maxIndex; + return maxValue; +#endif +} diff --git a/src/pifpaf_decoder/math_helpers.hpp b/src/pifpaf_decoder/math_helpers.hpp new file mode 100644 index 00000000..d187bc51 --- /dev/null +++ b/src/pifpaf_decoder/math_helpers.hpp @@ -0,0 +1,21 @@ +#pragma once + +// x[i] = v +void vfill(float*x, unsigned long n, float v); + +// c[i] = a[i] + b[i] +void vadd(const float *a, const float *b, float *c, unsigned long n); + +// x[i] = exp(x[i]) +void vexp(float *x, unsigned long n); + +// c[i] = a[i] * b[i] +void vmul(const float *a, const float *b, float *c, unsigned long n); + +// c[i] = a[i] * b +void vsmul(const float *a, float b, float *c, unsigned long n); + +// out = max(x) +// i = argmax(x) +float vargmax(const float *x, unsigned long n, int* i); + diff --git a/src/pifpaf_decoder/object_detection.hpp b/src/pifpaf_decoder/object_detection.hpp new file mode 100644 index 00000000..7a7bc673 --- /dev/null +++ b/src/pifpaf_decoder/object_detection.hpp @@ -0,0 +1,48 @@ +/// +/// Ai-app interface for object detection +/// +/// \copyright 2018 NVISO SA. All rights reserved. +/// \license This project is released under the XXXXXX License. +/// + +#pragma once + +#include "image_based.hpp" + +namespace lpdnn::ai_app { + +/// Object detection AiApp +class Object_detection : virtual public Image_based { + public: + struct Result { + struct Item { + float confidence{}; + int class_index{}; + Rect bounding_box{}; + Landmarks landmarks; + }; + + bool success{}; + std::vector items; + }; + + /// Set minimum detectable object size + /// @return true if success + virtual bool set_min_size(Dim2d minSize) = 0; + + /// Set maximum detectable object size + /// @return true if success + virtual bool set_max_size(Dim2d maxSize) = 0; + + /// Perform inference. + virtual Result execute(const Image& input) = 0; + + /// @return Names of classes + virtual std::vector classes() = 0; + + /// @return our aiapp class id + [[nodiscard]] const char* class_id() const override { return ai_class_id; } + static constexpr char const* ai_class_id = "com_bonseyes::object_detection"; +}; + +} // namespace lpdnn diff --git a/src/pifpaf_decoder/openpifpaf_postprocessor.cpp b/src/pifpaf_decoder/openpifpaf_postprocessor.cpp new file mode 100644 index 00000000..acf536fd --- /dev/null +++ b/src/pifpaf_decoder/openpifpaf_postprocessor.cpp @@ -0,0 +1,1162 @@ +#include +#include +#include +#include +#include +#include +#include "openpifpaf_postprocessor.hpp" +#include "math_helpers.hpp" + +namespace lpdnn::aiapp_impl { + +const int OpenPifPafPostprocessor::bones[19][2] = { + {16, 14}, {14, 12}, {17, 15}, {15, 13}, {12, 13}, { 6, 12}, { 7, 13}, + { 6, 7}, { 6, 8}, { 7, 9}, { 8, 10}, { 9, 11}, { 2, 3}, { 1, 2}, + { 1, 3}, { 2, 4}, { 3, 5}, { 4, 6}, { 5, 7}, +}; + +constexpr int C = 17; +constexpr float stride = 8.0f; +constexpr float seedThreshold = 0.2f; +constexpr float keypointThreshold = 0.001f; +constexpr float instanceThreshold = 0.2f; + +/* + Creates a (2, h, w) tensor where the first part is: + 0, 1, 2, 3, ..., w-1, + 0, 1, 2, 3, ..., w-1, + 0, 1, 2, 3, ..., w-1, + ... + and the second part is: + 0, 0, 0, 0, ..., 0, + 1, 1, 1, 1, ..., 1, + 2, 2, 2, 2, ..., 2, + ... + Used for normaling the PIFs and PAFs. +*/ +static std::vector makeIndexField(int h, int w) { + std::vector indexField(2 * h * w); + float* ptr = indexField.data(); + for (int y = 0; y < h; ++y) { + for (int x = 0; x < w; ++x) { + ptr[ y *w + x] = (float)x; + ptr[(y + h)*w + x] = (float)y; + } + } + return indexField; +} + +static void scalarSquareAddConstant(float* field, + int fieldH, + int fieldW, + const std::vector& x, + const std::vector& y, + const std::vector& width, + const std::vector& v) +{ + // minx_np = np.round(x_np - width_np).astype(np.int) + // minx_np = np.clip(minx_np, 0, field.shape[1] - 1) + std::vector minx(x.size()); + for (size_t i = 0; i < x.size(); ++i) { + minx[i] = std::min(fieldW - 1, std::max(0, (int)std::round(x[i] - width[i]))); + } + + // miny_np = np.round(y_np - width_np).astype(np.int) + // miny_np = np.clip(miny_np, 0, field.shape[0] - 1) + std::vector miny(y.size()); + for (size_t i = 0; i < y.size(); ++i) { + miny[i] = std::min(fieldH - 1, std::max(0, (int)std::round(y[i] - width[i]))); + } + + // maxx_np = np.round(x_np + width_np).astype(np.int) + // maxx_np = np.clip(maxx_np + 1, minx_np + 1, field.shape[1]) + std::vector maxx(x.size()); + for (size_t i = 0; i < x.size(); ++i) { + maxx[i] = std::min(fieldW, std::max(minx[i] + 1, (int)std::round(x[i] + width[i]) + 1)); + } + + // maxy_np = np.round(y_np + width_np).astype(np.int) + // maxy_np = np.clip(maxy_np + 1, miny_np + 1, field.shape[0]) + std::vector maxy(y.size()); + for (size_t i = 0; i < y.size(); ++i) { + maxy[i] = std::min(fieldH, std::max(miny[i] + 1, (int)std::round(y[i] + width[i]) + 1)); + } + + // for i in range(minx.shape[0]): + // for xx in range(minx[i], maxx[i]): + // for yy in range(miny[i], maxy[i]): + // field[yy, xx] += v[i] + for (size_t i = 0; i < minx.size(); ++i) { + for (int yy = miny[i]; yy < maxy[i]; ++yy) { + for (int xx = minx[i]; xx < maxx[i]; ++xx) { + field[yy * fieldW + xx] += v[i]; + } + } + } +} + +static void scalarSquareAddGauss(float* field, + int fieldH, + int fieldW, + const std::vector& x, + const std::vector& y, + const std::vector& sigma_, + const std::vector& v, + float truncate = 2.0f) +{ + // sigma_np = np.maximum(1.0, sigma_np) + // width_np = np.maximum(1.0, truncate * sigma_np) + auto sigma = sigma_; + std::vector width(sigma.size()); + for (size_t i = 0; i < sigma.size(); ++i) { + sigma[i] = std::max(1.0f, sigma[i]); + width[i] = std::max(1.0f, truncate * sigma[i]); + } + + // NOTE: The minx, miny, maxx, maxxy code is the same as in scalarSquareAddConstant(). + // Could probably extract that and do it just once. + + // minx_np = np.round(x_np - width_np).astype(np.int) + // minx_np = np.clip(minx_np, 0, field.shape[1] - 1) + std::vector minx(x.size()); + for (size_t i = 0; i < x.size(); ++i) { + minx[i] = std::min(fieldW - 1, std::max(0, (int)std::round(x[i] - width[i]))); + } + + // miny_np = np.round(y_np - width_np).astype(np.int) + // miny_np = np.clip(miny_np, 0, field.shape[0] - 1) + std::vector miny(y.size()); + for (size_t i = 0; i < y.size(); ++i) { + miny[i] = std::min(fieldH - 1, std::max(0, (int)std::round(y[i] - width[i]))); + } + + // maxx_np = np.round(x_np + width_np).astype(np.int) + // maxx_np = np.clip(maxx_np + 1, minx_np + 1, field.shape[1]) + std::vector maxx(x.size()); + for (size_t i = 0; i < x.size(); ++i) { + maxx[i] = std::min(fieldW, std::max(minx[i] + 1, (int)std::round(x[i] + width[i]) + 1)); + } + + // maxy_np = np.round(y_np + width_np).astype(np.int) + // maxy_np = np.clip(maxy_np + 1, miny_np + 1, field.shape[0]) + std::vector maxy(y.size()); + for (size_t i = 0; i < y.size(); ++i) { + maxy[i] = std::min(fieldH, std::max(miny[i] + 1, (int)std::round(y[i] + width[i]) + 1)); + } + + // for i in range(minx.shape[0]): + // for xx in range(minx[i], maxx[i]): + // deltax = xx - x[i] + // for yy in range(miny[i], maxy[i]): + // deltay = yy - y[i] + // vv = v[i] * np.exp(-0.5 * (deltax**2 + deltay**2) / sigma[i]**2) + // field[yy, xx] += vv + for (size_t i = 0; i < minx.size(); ++i) { + for (int xx = minx[i]; xx < maxx[i]; ++xx) { + float deltax = (float)xx - x[i]; + for (int yy = miny[i]; yy < maxy[i]; ++yy) { + float deltay = (float)yy - y[i]; + float vv = v[i] * std::exp(-0.5f * (deltax*deltax + deltay*deltay) / (sigma[i]*sigma[i])); + field[yy * fieldW + xx] += vv; + } + } + } + + /* + // For debugging + for (int y = 0; y < fieldH; ++y) { + for (int x = 0; x = fieldW) { return; } + if (miny >= fieldH) { return; } + + // field[miny:maxy, minx:maxx] += value + for (auto yy = miny; yy < maxy; ++yy) { + for (auto xx = minx; xx < maxx; ++xx) { + field[yy * fieldW + xx] += value; + } + } +} + +/** + Combines the different PAF outputs into one big (19, 2, 4, h, w) tensor. + + The input tensors have the shape (19, h, w) except for j1/j2Fields, which + are (38, h, w). +*/ +void OpenPifPafPostprocessor::normalizePAF(const float* intensityFields, + const float* j1Fields, + const float* j2Fields, + const float* j1FieldsLogb, + const float* j2FieldsLogb) +{ + float* pafPtr = paf.data(); + + // Strides for the first dimension of the input tensors: + const size_t if_stride_0 = H * W; + const size_t j1f_stride_0 = H * W; + const size_t j1bf_stride_0 = H * W; + const size_t j2f_stride_0 = H * W; + const size_t j2bf_stride_0 = H * W; + + for (int i = 0; i < 19; ++i) { + // Copy the next h*w values from intensityFields. + size_t ifOffset = i * if_stride_0; + size_t outOffset = i * paf_stride_0; + memcpy(pafPtr + outOffset, intensityFields + ifOffset, H * W * sizeof(float)); + + // Copy the next 2 h*w values from j1Fields. + size_t j1fOffset = (i * 2) * j1f_stride_0; + outOffset += paf_stride_2; + memcpy(pafPtr + outOffset, j1Fields + j1fOffset, 2 * H * W * sizeof(float)); + + // Also add the index field to the values from j1Fields. + vadd(indexField.data(), j1Fields + j1fOffset, pafPtr + outOffset, 2 * H * W); + + // Copy the next h*w values from j1FieldsLogb and exponentiate. + size_t j1bfOffset = i * j1bf_stride_0; + outOffset += paf_stride_2 * 2; + memcpy(pafPtr + outOffset, j1FieldsLogb + j1bfOffset, H * W * sizeof(float)); + vexp(pafPtr + outOffset, H * W); + + // Copy the same h*w values from intensityFields again. + outOffset = i * paf_stride_0 + paf_stride_1; + memcpy(pafPtr + outOffset, intensityFields + ifOffset, H * W * sizeof(float)); + + // Copy the next 2 h*w values from j2Fields. + size_t j2fOffset = (i * 2) * j2f_stride_0; + outOffset += paf_stride_2; + memcpy(pafPtr + outOffset, j2Fields + j2fOffset, 2 * H * W * sizeof(float)); + + // Also add the index field to the values from j2Fields. + vadd(indexField.data(), j2Fields + j2fOffset, pafPtr + outOffset, 2 * H * W); + + // Copy the next h*w values from j2FieldsLogb and exponentiate. + size_t j2bfOffset = i * j2bf_stride_0; + outOffset += paf_stride_2 * 2; + memcpy(pafPtr + outOffset, j2FieldsLogb + j2bfOffset, H * W * sizeof(float)); + vexp(pafPtr + outOffset, H * W); + } + + // NOTE: We could do the exponentiation for j1/j2FieldsLogb in the Core ML + // model already. + + /* + // For debugging + for (int y = 0; y < H; ++y) { + printf("%d: ", y); + for (int x = 0; x < W; ++x) { + printf("%f, ", paf[9*paf_stride_0 + 2*paf_stride_1 + 7*paf_stride_2 + y*W + x]); + } + printf("\n"); + } + */ +} + +/** + Combines the different PIF outputs into one big (17, 4, h, w) tensor. + + The input tensors have the shape (17, h, w) except for jointFields, which + is (34, h, w). +*/ +void OpenPifPafPostprocessor::normalizePIF(const float* jointIntensityFields, + const float* jointFields, + const float* scaleFields) +{ + float* pifPtr = pif.data(); + + // Strides for the first dimension of the input tensors: + const size_t iif_stride_0 = H * W; + const size_t jf_stride_0 = H * W; + const size_t sf_stride_0 = H * W; + + // The PyTorch code concatenates the following tensors: + // (17, 1, h, w) + // (17, 2, h, w) + // (17, 1, h, w) + // along the 2nd axis into one tensor of shape (17, 4, h, w). But the + // tensors from Core ML have the following shapes: + // (17, h, w) + // (34, h, w) + // (17, h, w) + // Fortunately, (17, 2, ...) has the same memory layout as (34, ...), + // so we can simply do a bunch of memcpy's. + + for (int i = 0; i < 17; ++i) { + // Copy the next h*w values from jointIntensityFields. + size_t jifOffset = i * iif_stride_0; + size_t outOffset = i * pif_stride_0; + memcpy(pifPtr + outOffset, jointIntensityFields + jifOffset, H * W * sizeof(float)); + + // Copy the next 2 h*w values from jointFields. + size_t jfOffset = (i * 2) * jf_stride_0; + outOffset += pif_stride_1; + memcpy(pifPtr + outOffset, jointFields + jfOffset, 2 * H * W * sizeof(float)); + + // Also add the index field to the values from jointFields. + vadd(indexField.data(), jointFields + jfOffset, pifPtr + outOffset, 2 * H * W); + + // Copy the next h*w values from scaleFields. + size_t sfOffset = i * sf_stride_0; + outOffset += pif_stride_1 * 2; + memcpy(pifPtr + outOffset, scaleFields + sfOffset, H * W * sizeof(float)); + } +} + +OpenPifPafPostprocessor::Target_intensity +OpenPifPafPostprocessor::targetIntensities(const std::vector& pif, + float v_th, bool coreOnly) +{ + const float pif_nn = 16.0f; + + const size_t targets_stride_0 = H_hr * W_hr; + const size_t scales_stride_0 = H_hr * W_hr; + const size_t ns_stride_0 = H_hr * W_hr; + + // These tensors need to be emptied out on each frame. + vfill(targetsCoreOnly.data(), targetsCoreOnly.size(), 0.0f); + vfill(targets.data(), targets.size(), 0.0f); + vfill(scales.data(), scales.size(), 0.0f); + vfill(ns.data(), ns.size(), 0.0f); + + std::vector v; + std::vector x; + std::vector y; + std::vector s; + + for (int i = 0; i < C; ++i) { + // Threshold pif[i, ...], which is a (4, h, w) tensor. Copy the values + // that are over the threshold into four vectors: v, x, y, s. Multiply + // x, y, s with the stride. + // + // v, x, y, s = p[:, p[0] > v_th] + // x = x * self.stride + // y = y * self.stride + // s = s * self.stride + v.clear(); + x.clear(); + y.clear(); + s.clear(); + const size_t pifOffset = i * pif_stride_0; + const size_t xOffset = pifOffset + pif_stride_1; + const size_t yOffset = xOffset + pif_stride_1; + const size_t sOffset = yOffset + pif_stride_1; + for (int j = 0; j < H*W; ++j) { + float p = pif[pifOffset + j]; + if (p > v_th) { + v.push_back(p); + x.push_back(pif[xOffset + j] * stride); + y.push_back(pif[yOffset + j] * stride); + s.push_back(pif[sOffset + j] * stride); + } + } + + /* + // For debugging + printf("iteration: %d\n", i); + printf("v:\n"); for (auto n : v) printf("%f, ", n); printf("\n"); + printf("x:\n"); for (auto n : x) printf("%f, ", n); printf("\n"); + printf("y:\n"); for (auto n : y) printf("%f, ", n); printf("\n"); + printf("s:\n"); for (auto n : s) printf("%f, ", n); printf("\n"); + */ + + // Create a high-resolution confidence map for this keypoint. + + // v / pif_nn + std::vector v_over_pif_nn(v.size()); + vsmul(v.data(), 1.0f / pif_nn, v_over_pif_nn.data(), v.size()); + + // The original code computed the "core only" version in a separate step + // but that duplicates a bunch of work, so we do it at the same time. + const auto tco = targetsCoreOnly.data() + i * targets_stride_0; + scalarSquareAddGauss(tco, H_hr, W_hr, x, y, s, v_over_pif_nn, 0.5); + + // s * v + std::vector s_times_v(v.size()); + vmul(s.data(), v.data(), s_times_v.data(), v.size()); + + const auto t = targets.data() + i * targets_stride_0; + const auto scale = scales.data() + i * scales_stride_0; + const auto n = ns.data() + i * ns_stride_0; + scalarSquareAddGauss(t, H_hr, W_hr, x, y, s, v_over_pif_nn); + scalarSquareAddConstant(scale, H_hr, W_hr, x, y, s, s_times_v); + scalarSquareAddConstant(n, H_hr, W_hr, x, y, s, v); + } + + // m = ns > 0 + // scales[m] = scales[m] / ns[m] + for (size_t i = 0; i < scales.size(); ++i) { + const auto d = ns[i]; + if (d > 0) { scales[i] /= d; } + } + + return Target_intensity{ targets, scales, targetsCoreOnly }; +} + +OpenPifPafPostprocessor::Paf_target +OpenPifPafPostprocessor::scorePafTarget(const std::vector& pafvec, + const std::vector& pifhr, + float pifhr_floor, + float score_th) const +{ + std::vector> scored_forward; + std::vector> scored_backward; + + for (int c = 0; c < 19; ++c) { + // The PAF has shape (19, 2, 4, h, w). We're looking at one (2, 4, h, w) + // slice at a time in this loop. + const size_t pafOffset = c * paf_stride_0; + + // scores = np.min(fourds[:, 0], axis=0) + // mask = scores > score_th + // scores = scores[mask] + std::vector scores; + std::vector mask; + for (int i = 0; i < H * W; ++i) { + auto a = pafvec[pafOffset + i]; + auto b = pafvec[pafOffset + paf_stride_1 + i]; + auto score = std::min(a, b); + if (score > score_th) { + scores.push_back(score); + mask.push_back(i); + } + } + + // fourds = fourds[:, :, mask] + const size_t scores_size = scores.size(); + std::vector masked(2 * 4 * scores_size); + for (size_t i = 0; i < mask.size(); ++i) { + const auto m = mask[i]; + masked[i ] = pafvec[pafOffset + m]; + masked[i + scores_size ] = pafvec[pafOffset + paf_stride_2 + m]; + masked[i + scores_size*2] = pafvec[pafOffset + paf_stride_2*2 + m]; + masked[i + scores_size*3] = pafvec[pafOffset + paf_stride_2*3 + m]; + masked[i + scores_size*4] = pafvec[pafOffset + paf_stride_1 + m]; + masked[i + scores_size*5] = pafvec[pafOffset + paf_stride_1 + paf_stride_2 + m]; + masked[i + scores_size*6] = pafvec[pafOffset + paf_stride_1 + paf_stride_2*2 + m]; + masked[i + scores_size*7] = pafvec[pafOffset + paf_stride_1 + paf_stride_2*3 + m]; + } + + std::vector scores_b(scores_size); + if (pifhr_floor < 1.0f) { + // ij_b = np.round(fourds[0, 1:3] * self.stride).astype(np.int) + // ij_b[0] = np.clip(ij_b[0], 0, self._pifhr.shape[2] - 1) + // ij_b[1] = np.clip(ij_b[1], 0, self._pifhr.shape[1] - 1) + std::vector ij_b(2 * scores_size); + for (size_t i = 0; i < scores_size*2; ++i) { + const int v = (int)std::round(masked[scores_size + i] * stride); + ij_b[i] = std::min(std::max(0, v), i < scores_size ? W_hr - 1 : H_hr - 1); + } + + // pifhr_b = self._pifhr[j1i, ij_b[1], ij_b[0]] + // scores_b = scores * (pifhr_floor + (1.0 - pifhr_floor) * pifhr_b) + const auto j1i = bones[c][0] - 1; + for (size_t i = 0; i < scores_b.size(); ++i) { + const auto pifhr_b = pifhr[j1i * pifhr_stride_0 + ij_b[scores_size + i] * pifhr_stride_1 + ij_b[i]]; + scores_b[i] = scores[i] * (pifhr_floor + (1.0f - pifhr_floor) * pifhr_b); + } + } else { + scores_b = scores; + } + + // mask_b = scores_b > score_th + std::vector mask_b; + for (int i = 0; i < (int)scores_b.size(); ++i) { + if (scores_b[i] > score_th) { mask_b.push_back(i); } + } + + // scored_backward.append(np.concatenate(( + // np.expand_dims(scores_b[mask_b], 0), + // fourds[1, 1:4][:, mask_b], + // fourds[0, 1:4][:, mask_b], + // ))) + const size_t mask_b_size = mask_b.size(); + std::vector result_b(7 * mask_b_size); + for (size_t i = 0; i < mask_b_size; ++i) { + const auto m = mask_b[i]; + result_b[i ] = scores_b[m]; + result_b[i + mask_b_size ] = masked[scores_size*5 + m]; + result_b[i + mask_b_size*2] = masked[scores_size*6 + m]; + result_b[i + mask_b_size*3] = masked[scores_size*7 + m]; + result_b[i + mask_b_size*4] = masked[scores_size + m]; + result_b[i + mask_b_size*5] = masked[scores_size*2 + m]; + result_b[i + mask_b_size*6] = masked[scores_size*3 + m]; + } + scored_backward.push_back(result_b); + + std::vector scores_f(scores_size); + if (pifhr_floor < 1.0f) { + // ij_f = np.round(fourds[1, 1:3] * self.stride).astype(np.int) + // ij_f[0] = np.clip(ij_f[0], 0, self._pifhr.shape[2] - 1) + // ij_f[1] = np.clip(ij_f[1], 0, self._pifhr.shape[1] - 1) + std::vector ij_f(2 * scores_size); + for (size_t i = 0; i < scores_size*2; ++i) { + const int v = (int)std::round(masked[scores_size*5 + i] * stride); + ij_f[i] = std::min(std::max(0, v), i < scores_size ? W_hr - 1 : H_hr - 1); + } + + // pifhr_f = self._pifhr[j2i, ij_f[1], ij_f[0]] + // scores_f = scores * (pifhr_floor + (1.0 - pifhr_floor) * pifhr_f) + const auto j2i = bones[c][1] - 1; + for (size_t i = 0; i < scores_f.size(); ++i) { + const auto pifhr_f = pifhr[j2i * pifhr_stride_0 + ij_f[scores_size + i] * pifhr_stride_1 + ij_f[i]]; + scores_f[i] = scores[i] * (pifhr_floor + (1.0f - pifhr_floor) * pifhr_f); + } + } else { + scores_f = scores; + } + + // mask_f = scores_f > score_th + std::vector mask_f; + for (int i = 0; i < (int)scores_b.size(); ++i) { + if (scores_f[i] > score_th) { mask_f.push_back(i); } + } + + // scored_forward.append(np.concatenate(( + // np.expand_dims(scores_f[mask_f], 0), + // fourds[0, 1:4][:, mask_f], + // fourds[1, 1:4][:, mask_f], + // ))) + const size_t mask_f_size = mask_f.size(); + std::vector result_f(7 * mask_f_size); + for (size_t i = 0; i < mask_f_size; ++i) { + const auto m = mask_f[i]; + result_f[i ] = scores_f[m]; + result_f[i + mask_f_size ] = masked[scores_size + m]; + result_f[i + mask_f_size*2] = masked[scores_size*2 + m]; + result_f[i + mask_f_size*3] = masked[scores_size*3 + m]; + result_f[i + mask_f_size*4] = masked[scores_size*5 + m]; + result_f[i + mask_f_size*5] = masked[scores_size*6 + m]; + result_f[i + mask_f_size*6] = masked[scores_size*7 + m]; + } + scored_forward.push_back(result_f); + + /* + // For debugging + printf("iteration: %d\n", c); + printf("scores:\n"); for (auto n : scores) printf("%f, ", n); printf("\n"); + printf("mask:\n"); for (auto n : mask) printf("%d, ", n); printf("\n"); + printf("masked:\n"); for (auto n : masked) printf("%f, ", n); printf("\n"); + printf("scores_b:\n"); for (auto n : scores_b) printf("%f, ", n); printf("\n"); + printf("scores_f:\n"); for (auto n : scores_f) printf("%f, ", n); printf("\n"); + */ + } + return Paf_target{ scored_forward, scored_backward }; +} + +std::vector +OpenPifPafPostprocessor::pifhrSeeds(const std::vector& pifhrScales, + const std::vector& pifhrCore) +{ + std::vector seeds; + + for (int field_i = 0; field_i < 17; ++field_i) { + const size_t pifhrScalesOffset = field_i * pifhr_stride_0; + const size_t pifhrCoreOffset = field_i * pifhr_stride_0; + + // candidates = np.concatenate((index_fields, np.expand_dims(f, 0)), 0) + // mask = f > self.seed_threshold + std::vector mask; + for (int i = 0; i < H_hr * W_hr; ++i) { + const auto value = pifhrCore[pifhrCoreOffset + i]; + if (value > seedThreshold) { mask.push_back(i); } + } + + // candidates = np.moveaxis(candidates[:, mask], 0, -1) + // This is a (count, 3) tensor where count is #elements over threshold. + std::vector masked(mask.size() * 3); + for (size_t i = 0; i < mask.size(); ++i) { + const auto m = mask[i]; + masked[i*3 ] = indexField_hr[m]; + masked[i*3 + 1] = indexField_hr[m + H_hr*W_hr]; + masked[i*3 + 2] = pifhrCore[pifhrCoreOffset + m]; + } + + // occupied = np.zeros(s.shape) + std::vector occupied(H_hr * W_hr, 0.0f); + + std::vector sorted(mask.size()); + std::iota(sorted.begin(), sorted.end(), 0); + std::sort(sorted.begin(), sorted.end(), [masked] (int const& a, int const& b) { + return masked[a*3 + 2] > masked[b*3 + 2]; + }); + + // for c in sorted(candidates, key=lambda c: c[2], reverse=True): + for (auto c : sorted) { + const auto c_0 = masked[c*3]; + const auto c_1 = masked[c*3 + 1]; + const auto c_2 = masked[c*3 + 2]; + + // i, j = int(c[0]), int(c[1]) + const auto i = (int)c_0; + const auto j = (int)c_1; + if (occupied[j*W_hr + i] > 0) { continue; } + + // width = max(4, s[j, i]) + const auto s = pifhrScales[pifhrScalesOffset + j * pifhr_stride_1 + i]; + const auto width = std::max(4.0f, s); + + // scalar_square_add_single(occupied, c[0], c[1], width / 2.0, 1.0) + scalarSquareAddSingle(occupied.data(), H_hr, W_hr, c_0, c_1, width / 2.0f, 1.0f); + + // seeds.append((c[2], field_i, c[0] / self.stride, c[1] / self.stride)) + seeds.emplace_back( c_2, field_i, c_0 / stride, c_1 / stride ); + } + } + + // seeds = list(sorted(seeds, reverse=True)) + std::sort(seeds.begin(), seeds.end(), [] (const Pifhr_seed& a, const Pifhr_seed& b) { + const auto ca = std::get<0>(a); + const auto cb = std::get<0>(b); + return ca > cb; + }); + + // if len(seeds) > 500: + // if seeds[500][0] > 0.1: + // seeds = [s for s in seeds if s[0] > 0.1] + // else: + // seeds = seeds[:500] + if (seeds.size() > 500) { + seeds.resize(500); + } + return seeds; +} + +std::vector +OpenPifPafPostprocessor::pafCenter(const std::vector& paf_field, + float x, float y, float sigma) +{ + std::vector mask; + const int paf_stride = (int)paf_field.size() / 7; + for (int i = 0; i < paf_stride; ++i) { + const bool take = (paf_field[ paf_stride + i] > x - sigma * paf_field[3*paf_stride + i]) && + (paf_field[ paf_stride + i] < x + sigma * paf_field[3*paf_stride + i]) && + (paf_field[2*paf_stride + i] > y - sigma * paf_field[3*paf_stride + i]) && + (paf_field[2*paf_stride + i] < y + sigma * paf_field[3*paf_stride + i]); + if (take) { mask.push_back(i); } + } + if (mask.empty()) { return {}; } + + const int mask_size = (int)mask.size(); + const int out_stride = mask_size; + std::vector result(7 * mask_size, 0.0f); + for (int j = 0; j < 7; ++j) { + for (int i = 0; i < mask_size; ++i) { + const int m = mask[i]; + result[j*out_stride + i] = paf_field[j*paf_stride + m]; + } + } + return result; +} + +OpenPifPafPostprocessor::Connection +OpenPifPafPostprocessor::growConnection(float x, float y, + const std::vector& paf_field_) +{ + // # source value + // paf_field = paf_center(paf_field, xy[0], xy[1], sigma=2.0) + // if paf_field.shape[1] == 0: + // return 0, 0, 0 + const auto paf_field = pafCenter(paf_field_, x, y, 2.0f); + if (paf_field.empty()) { return Connection{ 0, 0, 0}; } + + // # source distance + // d = np.linalg.norm(np.expand_dims(xy, 1) - paf_field[1:3], axis=0) + // b_source = paf_field[3] * 3.0 + // # combined value and source distance + // v = paf_field[0] + // scores = np.exp(-1.0 * d / b_source) * v # two-tailed cumulative Laplace + const int paf_stride = (int)paf_field.size() / 7; + std::vector scores(paf_stride); + for (int i = 0; i < paf_stride; ++i) { + const auto a = x - paf_field[paf_stride + i]; + const auto b = y - paf_field[paf_stride*2 + i]; + const auto d = std::sqrt(a*a + b*b); + const auto b_source = paf_field[paf_stride*3 + i] * 3.0f; + const auto v = paf_field[i]; + scores[i] = std::exp(-d / b_source) * v; + } + + // return self._target_with_maxscore(paf_field[4:7], scores) + int max_i; + const float score = vargmax(scores.data(), scores.size(), &max_i); + return Connection{ paf_field[paf_stride*4 + max_i], paf_field[paf_stride*5 + max_i], score }; +} + +std::vector OpenPifPafPostprocessor::frontier(Annotation& ann) { + std::vector f; + + for (int connection_i = 0; connection_i < numBones; ++connection_i) { + const auto bone = bones[connection_i]; + const auto j1i = bone[0] - 1; + const auto j2i = bone[1] - 1; + if (ann.keypoints[j1i*3 + 2] > 0.0f && ann.keypoints[j2i*3 + 2] == 0.0f) { + f.emplace_back( ann.keypoints[j1i*3 + 2], connection_i, true, j1i, j2i ); + } + } + + for (int connection_i = 0; connection_i < numBones; ++connection_i) { + const auto bone = bones[connection_i]; + const auto j1i = bone[0] - 1; + const auto j2i = bone[1] - 1; + if (ann.keypoints[j2i*3 + 2] > 0.0f && ann.keypoints[j1i*3 + 2] == 0.0f) { + f.emplace_back( ann.keypoints[j2i*3 + 2], connection_i, false, j1i, j2i ); + } + } + + std::sort(f.begin(), f.end(), [] (const frontier_t& a, const frontier_t& b) { + const auto ca = std::get<0>(a); + const auto cb = std::get<0>(b); + return ca > cb; + }); + + return f; +} + +OpenPifPafPostprocessor::frontier_t OpenPifPafPostprocessor::frontierIter(Annotation& ann) { + while (frontierActive) { + // unblocked_frontier = [f for f in self.frontier() + // if (f[1], f[2]) not in block_frontier] + std::vector unblockedFrontier; + for (auto f : frontier(ann)) { + const auto connection_id = std::get<1>(f); + const auto forward = std::get<2>(f); + if (blockFrontier.find(std::tuple{ connection_id, forward }) == blockFrontier.end()) { + unblockedFrontier.push_back(f); + } + } + + /* + // For debugging + printf("unblockedFrontier "); + for (auto n : unblockedFrontier) { + printf("(%f, %d, %s, %d, %d), ", std::get<0>(n), std::get<1>(n), + std::get<2>(n) ? "true" : "false", + std::get<3>(n), std::get<4>(n)); + } + printf("\n"); + */ + + // if not unblocked_frontier: + // break + if (unblockedFrontier.empty()) { + frontierActive = false; + break; + } + + // first = unblocked_frontier[0] + // yield first + // block_frontier.add((first[1], first[2])) + const auto first = unblockedFrontier[0]; + const auto connection_id = std::get<1>(first); + const auto forward = std::get<2>(first); + blockFrontier.insert(std::tuple{ connection_id, forward }); + return first; + } + return {}; +} + +void OpenPifPafPostprocessor::grow(Annotation& ann, + const std::vector>& pafForward, + const std::vector>& pafBackward, + float th) +{ + frontierActive = true; + blockFrontier.clear(); + + while (true) { + const auto f = frontierIter(ann); + if (!frontierActive) { return; } + + const auto i = std::get<1>(f); + const auto forward = std::get<2>(f); + const auto j1i = std::get<3>(f); + const auto j2i = std::get<4>(f); + + // For debugging + //printf("grow: %d %s %d %d\n", i, forward ? "true" : "false", j1i, j2i); + + float x, y, v; + std::vector directed_paf_field; + std::vector directed_paf_field_reverse; + if (forward) { + x = ann.keypoints[j1i*3 ]; + y = ann.keypoints[j1i*3 + 1]; + v = ann.keypoints[j1i*3 + 2]; + directed_paf_field = pafForward[i]; + directed_paf_field_reverse = pafBackward[i]; + } else { + x = ann.keypoints[j2i*3 ]; + y = ann.keypoints[j2i*3 + 1]; + v = ann.keypoints[j2i*3 + 2]; + directed_paf_field = pafBackward[i]; + directed_paf_field_reverse = pafForward[i]; + } + + const auto t = growConnection(x, y, directed_paf_field); + const auto new_x = std::get<0>(t); + const auto new_y = std::get<1>(t); + auto new_v = std::get<2>(t); + + if (new_v < th) { continue; } + + // reverse match + if (th >= 0.1) { + const auto t1 = growConnection(new_x, new_y, directed_paf_field_reverse); + const auto reverse_x = std::get<0>(t1); + const auto reverse_y = std::get<1>(t1); + const auto reverse_v = std::get<2>(t1); + if (reverse_v < th) { continue; } + if (std::abs(x - reverse_x) + std::abs(y - reverse_y) > 1.0f) { continue; } + } + + new_v = std::sqrt(new_v * v); // geometric mean + + if (forward) { + if (new_v > ann.keypoints[j2i*3 + 2]) { + ann.keypoints[j2i*3 ] = new_x; + ann.keypoints[j2i*3 + 1] = new_y; + ann.keypoints[j2i*3 + 2] = new_v; + } + } else { + if (new_v > ann.keypoints[j1i*3 + 2]) { + ann.keypoints[j1i*3 ] = new_x; + ann.keypoints[j1i*3 + 1] = new_y; + ann.keypoints[j1i*3 + 2] = new_v; + } + } + } +} + +void OpenPifPafPostprocessor::fillJointScales(Annotation& ann, + const std::vector& scales, + int fieldH, + int fieldW, + float hr_scale) +{ + for (int k = 0; k < numKeypoints; ++k) { + const auto x = ann.keypoints[k*3]; + const auto y = ann.keypoints[k*3 + 1]; + const auto v = ann.keypoints[k*3 + 2]; + if (v == 0) { continue; } + + // i = max(0, min(scale_field.shape[1] - 1, int(round(xyv[0] * hr_scale)))) + // j = max(0, min(scale_field.shape[0] - 1, int(round(xyv[1] * hr_scale)))) + const auto i = std::max(0, std::min(fieldW - 1, (int)std::round(x * hr_scale))); + const auto j = std::max(0, std::min(fieldH - 1, (int)std::round(y * hr_scale))); + + // self.joint_scales[xyv_i] = scale_field[j, i] / hr_scale + ann.jointScales[k] = scales[k*pifhr_stride_0 + j*pifhr_stride_1 + i] / hr_scale; + } +} + +std::vector +OpenPifPafPostprocessor::decodeAnnotations(const std::vector& pifhr, + const std::vector& pifhrScales, + const std::vector& pifhrCore, + const std::vector>& pafForward, + const std::vector>& pafBackward) +{ + const auto seeds = pifhrSeeds(pifhrScales, pifhrCore); + + // This is a (17, H_hr, W_hr) tensor. + std::vector occupied(17 * H_hr * W_hr, 0.0f); + + std::vector annotations; + for (auto& seed : seeds) { + const auto v = std::get<0>(seed); + const auto f = std::get<1>(seed); + const auto x = std::get<2>(seed); + const auto y = std::get<3>(seed); + + const auto i = std::min(std::max(0, (int)std::round(x * stride)), W_hr - 1); + const auto j = std::min(std::max(0, (int)std::round(y * stride)), H_hr - 1); + if (occupied[f*H_hr*W_hr + j*W_hr + i] > 0.0f) { continue; } + + Annotation ann(f, x, y, v); + grow(ann, pafForward, pafBackward); + fillJointScales(ann, pifhrScales, H_hr, W_hr, stride); + annotations.push_back(ann); + + for (int i = 0; i < numKeypoints; ++i) { + const auto x = ann.keypoints[i*3]; + const auto y = ann.keypoints[i*3 + 1]; + const auto v = ann.keypoints[i*3 + 2]; + if (v == 0) { continue; } + + const auto width = ann.jointScales[i] * stride; + scalarSquareAddSingle(occupied.data() + i*H_hr*W_hr, H_hr, W_hr, + x * stride, y * stride, width / 2.0f, 1.0f); + } + } + return annotations; +} + +std::vector OpenPifPafPostprocessor::softNMS(std::vector& annotations) { + float maxx = 0.0f; + float maxy = 0.0f; + for (auto& ann : annotations) { + for (int k = 0; k < numKeypoints; ++k) { + auto x = ann.keypoints[k*3]; + auto y = ann.keypoints[k*3 + 1]; + if (x > maxx) { maxx = x; } + if (y > maxy) { maxy = y; } + } + } + + const auto h = (int)(maxy + 1); + const auto w = (int)(maxx + 1); + std::vector occupied(17 * h * w, 0.0f); + + std::vector sorted(annotations.size()); + std::iota(sorted.begin(), sorted.end(), 0); + std::sort(sorted.begin(), sorted.end(), [annotations] (int const& a, int const& b) { + return annotations[a].score() > annotations[b].score(); + }); + + for (auto a : sorted) { + Annotation& ann = annotations[a]; + for (int k = 0; k < numKeypoints; ++k) { + const auto x = ann.keypoints[k*3 ]; + const auto y = ann.keypoints[k*3 + 1]; + const auto v = ann.keypoints[k*3 + 2]; + if (v == 0) { continue; } + + const auto i = std::min(std::max(0, (int)std::round(x)), w - 1); + const auto j = std::min(std::max(0, (int)std::round(y)), h - 1); + + if (occupied[k*h*w + j*w + i] > 0.0f) { + ann.keypoints[k*3 + 2] = 0.0f; + } else { + scalarSquareAddSingle(occupied.data() + k*h*w, h, w, x, y, ann.jointScales[k], 1.0f); + } + } + } + + std::vector filtered; + for (auto& ann : annotations) { + for (int k = 0; k < numKeypoints; ++k) { + if (ann.keypoints[k*3 + 2] > 0.0f) { + filtered.push_back(ann); + break; + } + } + } + return filtered; + + // Note: The original code sorts here on the score (descending), but + // we sort again later on so it's a bit quicker if we skip that here. +} + +void OpenPifPafPostprocessor::initTensors(int tensorWidth, int tensorHeight) { + H = tensorHeight; + W = tensorWidth; + H_hr = H * (int)stride; + W_hr = W * (int)stride; + + paf_stride_2 = H * W; + paf_stride_1 = 4 * paf_stride_2; + paf_stride_0 = 2 * paf_stride_1; + + pif_stride_1 = H * W; + pif_stride_0 = 4 * pif_stride_1; + + pifhr_stride_1 = W_hr; + pifhr_stride_0 = H_hr * pifhr_stride_1; + + indexField = makeIndexField(H, W); + indexField_hr = makeIndexField(H_hr, W_hr); + paf = std::vector(19 * 2 * 4 * H * W); + pif = std::vector(17 * 4 * H * W); + + const int shape = C * H_hr * W_hr; + targetsCoreOnly = std::vector(shape); + targets = std::vector(shape); + scales = std::vector(shape); + ns = std::vector(shape); +} + +ai_app::Object_detection::Result OpenPifPafPostprocessor::postprocess_0_8( + int inputWidth, int inputHeight, + int tensorWidth, int tensorHeight, + const float* pif_c, + const float* pif_r, + const float* pif_s, + const float* paf_c, + const float* paf_r1, + const float* paf_r2, + const float* paf_b1, + const float* paf_b2) +{ + this->inputWidth = inputWidth; + this->inputHeight = inputHeight; + + // Allocate the intermediate tensors the first time or when the size changes. + if (W != tensorWidth || H != tensorHeight) { + initTensors(tensorWidth, tensorHeight); + } + + normalizePAF(paf_c, paf_r1, paf_r2, paf_b1, paf_b2); + normalizePIF(pif_c, pif_r, pif_s); + + const auto ti = targetIntensities(pif); + const auto pifhr = std::get<0>(ti); + const auto pifhrScales = std::get<1>(ti); + const auto pifhrCore = std::get<2>(ti); + + /* + // For debugging + for (int c = 0; c < 17; ++c) { + for (int y = 0; y < H_hr; ++y) { + for (int x = 0; x < W_hr; ++x) { + printf("%f, ", pifhrCore[c*136*248 + y*248 + x]); + } + } + printf("\n"); + } + */ + + // This returns two lists that each contain 19 tensors of shape (7, ?) + // where the second dimension can vary in size (depends on thresholds). + const auto pt = scorePafTarget(paf, pifhr); + const auto pafForward = std::get<0>(pt); + const auto pafBackward = std::get<1>(pt); + + /* + // For debugging + printf("pafForward:\n"); + for (auto& i : pafForward) { + for (auto j : i) { printf("%f, ", j); } printf("\n"); + } + printf("\npafBackward:\n"); + for (auto i : pafBackward) { + for (auto& j : i) { printf("%f, ", j); } printf("\n"); + } + */ + + auto annotations = decodeAnnotations(pifhr, pifhrScales, pifhrCore, pafForward, pafBackward); + + // Scale to input size + const float output_stride = 8.0f; + for (auto& ann : annotations) { + for (int k = 0; k < numKeypoints; ++k) { + ann.keypoints[k*3 ] *= output_stride; + ann.keypoints[k*3 + 1] *= output_stride; + ann.jointScales[k] *= output_stride; + } + } + + // Non-maximum suppression + if (!annotations.empty()) { + annotations = softNMS(annotations); + } + + // Threshold + std::vector thresholded; + for (auto& ann : annotations) { + for (int k = 0; k < numKeypoints; ++k) { + if (ann.keypoints[k*3 + 2] < keypointThreshold) { + ann.keypoints[k*3 + 2] = 0.0f; + } + } + if (ann.score() >= instanceThreshold) { + thresholded.push_back(ann); + } + } + + std::sort(thresholded.begin(), thresholded.end(), [] (const Annotation& a, const Annotation& b) { + return a.score() > b.score(); + }); + + // Convert to normalized coordinates + for (auto& ann : thresholded) { + for (int k = 0; k < numKeypoints; ++k) { + ann.keypoints[k*3 ] /= inputWidth; + ann.keypoints[k*3 + 1] /= inputHeight; + } + } + + /* + // For debugging + for (auto ann : thresholded) { + printf("Keypoints:\n"); + for (auto k : ann.keypoints) { + printf("%f, ", k); + } + printf("\nJoint scales:\n"); + for (auto k : ann.jointScales) { + printf("%f, ", k); + } + printf("\n"); + } + */ + + ai_app::Object_detection::Result result; + result.success = true; + for (auto& ann : thresholded) { + ai_app::Landmarks landmarks; + landmarks.type = "body_pose_pifpaf"; + + int minx = std::numeric_limits::max(), + miny = std::numeric_limits::max(), + maxx = -std::numeric_limits::max(), + maxy = -std::numeric_limits::max(); + + for (int k = 0; k < numKeypoints; ++k) { + const int x = ann.keypoints[k*3 ] * 10000; // FIXME: MAGIC NUMBER. + const int y = ann.keypoints[k*3 + 1] * 10000; + const auto v = ann.keypoints[k*3 + 2]; + + if (v > 0.0f) { + if (x < minx) { minx = x; } + if (x > maxx) { maxx = x; } + if (y < miny) { miny = y; } + if (y > maxy) { maxy = y; } + } + + ai_app::Landmark landmark; + landmark.confidence = v; + landmark.position.x = x; + landmark.position.y = y; + landmarks.points.push_back(landmark); + } + + ai_app::Object_detection::Result::Item item; + item.confidence = ann.score(); + item.class_index = 1; + item.bounding_box.origin.x = minx; + item.bounding_box.origin.y = miny; + item.bounding_box.size.x = maxx - minx; + item.bounding_box.size.y = maxy - miny; + item.landmarks = landmarks; + + result.items.push_back(item); + } + return result; +} + +} \ No newline at end of file diff --git a/src/pifpaf_decoder/openpifpaf_postprocessor.hpp b/src/pifpaf_decoder/openpifpaf_postprocessor.hpp new file mode 100644 index 00000000..9124b1f1 --- /dev/null +++ b/src/pifpaf_decoder/openpifpaf_postprocessor.hpp @@ -0,0 +1,188 @@ +#pragma once + +#include +#include +#include +#include + +#include "object_detection.hpp" + +namespace lpdnn::aiapp_impl { + +/** + Post-processing logic for OpenPifPaf + + \note This object caches the big tensors to save on memory allocations. + This means it's best to make one instance of this class and keep using it. + For the most efficient results, make sure the input tensors are always the + same width and height. + + \note This code is not threadsafe. Don't call it from multiple threads at + the same time. If you must use multiple threads, give each thread its own + instance of this class. + */ +class OpenPifPafPostprocessor +{ +public: + OpenPifPafPostprocessor() : H(0), W(0) { } + + /** + Applies post-processing to OpenPifPaf output. + + \param inpWidth Width of the input tensor in pixels. + \param inpHeight Height of the input tensor in pixels. + \param tensorWidth Width of the neural network's PIF and PAF outputs. + \param tensorHeight Height of the neural network's PIF and PAF outputs. + */ + ai_app::Object_detection::Result postprocess_0_8( + int inpWidth, int inpHeight, int tensorWidth, int tensorHeight, + const float* pif_c, // 17xHxW + const float* pif_r, // 34xHxW + const float* pif_s, // 17xHxW + const float* paf_c, // 19xHxW + const float* paf_r1, // 38xHxW + const float* paf_r2, // 38xHxW + const float* paf_b1, // 19xHxW + const float* paf_b2 // 19xHxW + ); + +public: + static const int numKeypoints = 17; + static const int numBones = 19; + + // Connections between the different keypoint indices. + // Note: these start at 1, not 0! + static const int bones[19][2]; + +private: + struct Annotation { + // Array of `numKeypoints * 3` elements: + // - element `i*3 + 0` is x-coordinate (normalized) + // - element `i*3 + 1` is y-coordinate (normalized) + // - element `i*3 + 2` is confidence score + std::vector keypoints; + + std::vector jointScales; + + Annotation(int j, float x, float y, float v) : keypoints(numKeypoints * 3), + jointScales(numKeypoints) + { + keypoints[j*3 ] = x; + keypoints[j*3 + 1] = y; + keypoints[j*3 + 2] = v; + } + + /** + Overall confidence score for the entire skeleton. + */ + [[nodiscard]] float score() const { + float maxv = 0.0f; + float vv = 0.0f; + for (int k = 0; k < numKeypoints; ++k) { + auto v = keypoints[k*3 + 2]; + maxv = std::max(maxv, v); + vv += v * v; + } + return 0.1f * maxv + 0.9f * vv / (float)numKeypoints; + } + }; + + // 0: confidence of origin + // 1: connection index + // 2: forward? + // 3: joint index 1 (not corrected for forward) + // 4: joint index 2 (not corrected for forward) + typedef std::tuple frontier_t; + typedef std::tuple, std::vector, std::vector> Target_intensity; + typedef std::tuple>, std::vector>> Paf_target; + typedef std::tuple Pifhr_seed; + typedef std::tuple Connection; + +private: + void initTensors(int tensorWidth, int tensorHeight); + + void normalizePAF(const float* intensityFields, + const float* j1Fields, + const float* j2Fields, + const float* j1FieldsLogb, + const float* j2FieldsLogb); + + void normalizePIF(const float* jointIntensityFields, + const float* jointFields, + const float* scaleFields); + + Target_intensity + targetIntensities(const std::vector& pif, + float v_th = 0.1f, + bool coreOnly = false); + + Paf_target + scorePafTarget(const std::vector& pafvec, + const std::vector& pifhr, + float pifhr_floor = 0.01f, + float score_th = 0.1f) const; + + std::vector + pifhrSeeds(const std::vector& pifhrScales, + const std::vector& pifhrCore); + + static std::vector pafCenter(const std::vector& paf_field, + float x, float y, float sigma = 1.0f); + + static Connection + growConnection(float x, float y, const std::vector& paf_field_); + + static std::vector frontier(Annotation& ann); + + frontier_t frontierIter(Annotation& ann); + + void grow(Annotation& ann, + const std::vector>& pafForward, + const std::vector>& pafBackward, + float th = 0.1f); + + void fillJointScales(Annotation& ann, + const std::vector& scales, + int fieldH, + int fieldW, + float hr_scale); + + std::vector + decodeAnnotations(const std::vector& pifhr, + const std::vector& pifhrScales, + const std::vector& pifhrCore, + const std::vector>& pafForward, + const std::vector>& pafBackward); + + std::vector softNMS(std::vector& annotations); + +private: + // Used to normalize the skeleton keypoint coordinates to [0, 1]. + float inputWidth, inputHeight; + + // Tensor dimensions (hr = high-resolution). + int H, W, H_hr, W_hr; + + // Strides for tensor dimensions. + size_t paf_stride_2, paf_stride_1, paf_stride_0; + size_t pif_stride_1, pif_stride_0; + size_t pifhr_stride_1, pifhr_stride_0; + + // Temporary tensors. + std::vector indexField; // 2 x H x W + std::vector indexField_hr; // 2 x H x W + std::vector paf; // 19 x 2 x 4 x H x W + std::vector pif; // 17 x 4 x H x W + + // Filled in by targetIntensities(). + std::vector targetsCoreOnly; + std::vector targets; + std::vector scales; + std::vector ns; + + std::set> blockFrontier; + bool frontierActive; +}; + +} + From f897e945517597c5b88fd0926dced3bed4f5a738 Mon Sep 17 00:00:00 2001 From: ganler Date: Sat, 26 Jun 2021 02:35:36 +0800 Subject: [PATCH 2/4] refact: rm comments --- examples/cli.cpp | 15 ++++++++++----- src/pifpaf.cpp | 3 --- src/pifpaf_decoder/openpifpaf_postprocessor.cpp | 5 ----- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/examples/cli.cpp b/examples/cli.cpp index 80a54aa4..c37000ed 100644 --- a/examples/cli.cpp +++ b/examples/cli.cpp @@ -9,13 +9,14 @@ #define kSTREAM "stream" #define kPAF "paf" #define kPPN "ppn" +#define kPIFPAF "pifpaf" // Model Configuration. DEFINE_string(model, "../data/models/TinyVGG-V1-HW=256x384.uff", "Path to the model."); DEFINE_string( post, kPAF, - "Post-processing method. (`" kPAF "` -> [Part Affine Field] or `" kPPN "` -> [Pose Proposal Network])"); + "Post-processing method. (`" kPAF "` -> [Part Affine Field] or `" kPPN "` -> [Pose Proposal Network]) or `" kPIFPAF "` -> [Pif Paf]"); DEFINE_int32(w, 384, "Width of input image."); DEFINE_int32(h, 256, "Height of input image."); DEFINE_int32(max_batch_size, 8, "Max batch size for inference engine to execute."); @@ -37,18 +38,19 @@ namespace hp = hyperpose; class parser_variant { public: + using var_t = std::variant; template std::vector process(Container&& feature_map_containers) { return std::visit([&feature_map_containers](auto& arg) { return arg.process(feature_map_containers); }, m_parser); } - parser_variant(std::variant v) + parser_variant(var_t v) : m_parser(std::move(v)) { } private: - std::variant m_parser; + var_t m_parser; }; //parser_variant parser{parser}; @@ -142,14 +144,17 @@ int main(int argc, char** argv) }(); cli_log() << "DNN engine is built.\n"; - auto parser = parser_variant{ [&engine]() -> std::variant { + auto parser = parser_variant{ [&engine]() -> parser_variant::var_t { if (FLAGS_post == kPAF) return hp::parser::paf{}; if (FLAGS_post == kPPN) return hp::parser::pose_proposal(engine.input_size()); - cli_log() << "ERROR: Unknown post-processing flag: `" << FLAGS_post << "`. Use `paf` or `ppn` please.\n"; + if (FLAGS_post == kPIFPAF) + return hp::parser::pifpaf{}; + + cli_log() << "ERROR: Unknown post-processing flag: `" << FLAGS_post << "`. Use `paf`, `ppn` or `pifpaf` please.\n"; std::exit(-1); }() }; diff --git a/src/pifpaf.cpp b/src/pifpaf.cpp index ca4b71f6..1482f6bb 100644 --- a/src/pifpaf.cpp +++ b/src/pifpaf.cpp @@ -102,10 +102,8 @@ std::vector pifpaf::process(const feature_map_t& paf, const feature_map pif_conf.data(), pif_xy.data(), pif_s.data(), paf_conf.data(), paf_xy1.data(), paf_xy2.data(), paf_b1.data(), paf_b2.data()); -// std::cout << "Check pif[0]\t" << pif.view()[0] << std::endl; std::vector ret{}; ret.reserve(apires.items.size()); -// std::cout << apires.items.size() << "...size\n"; /* * @@ -125,7 +123,6 @@ HyperPose: Unified Topology dst.score = 1;// src.confidence; FIXME dst.x = src.position.x / 10000.; dst.y = src.position.y / 10000.; -// std::cout << dst.x << ' ' << dst.y << '\n'; dst.has_value = true; } }; diff --git a/src/pifpaf_decoder/openpifpaf_postprocessor.cpp b/src/pifpaf_decoder/openpifpaf_postprocessor.cpp index acf536fd..21049d96 100644 --- a/src/pifpaf_decoder/openpifpaf_postprocessor.cpp +++ b/src/pifpaf_decoder/openpifpaf_postprocessor.cpp @@ -489,11 +489,6 @@ OpenPifPafPostprocessor::scorePafTarget(const std::vector& pafvec, if (scores_b[i] > score_th) { mask_b.push_back(i); } } - // scored_backward.append(np.concatenate(( - // np.expand_dims(scores_b[mask_b], 0), - // fourds[1, 1:4][:, mask_b], - // fourds[0, 1:4][:, mask_b], - // ))) const size_t mask_b_size = mask_b.size(); std::vector result_b(7 * mask_b_size); for (size_t i = 0; i < mask_b_size; ++i) { From 85bfb5f9f96e1d40ee716681f86dc01b5d50ce9e Mon Sep 17 00:00:00 2001 From: ganler Date: Sat, 26 Jun 2021 12:01:41 +0800 Subject: [PATCH 3/4] feat: openpifpaf decoder finalized --- README.md | 1 + examples/cli.cpp | 8 +- examples/gen_serialized_engine.example.cpp | 2 +- ...ator_api_batched_images_pifpaf.example.cpp | 2 +- include/hyperpose/operator/parser/pifpaf.hpp | 13 +- src/human.cpp | 2 +- src/pifpaf.cpp | 111 +- src/pifpaf_decoder/aiapp.hpp | 200 +- src/pifpaf_decoder/image_based.hpp | 270 +-- src/pifpaf_decoder/math_helpers.cpp | 103 +- src/pifpaf_decoder/math_helpers.hpp | 17 +- src/pifpaf_decoder/object_detection.hpp | 72 +- .../openpifpaf_postprocessor.cpp | 1937 ++++++++--------- .../openpifpaf_postprocessor.hpp | 266 +-- 14 files changed, 1299 insertions(+), 1705 deletions(-) diff --git a/README.md b/README.md index 2db67570..7806a2d4 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,7 @@ We compare the prediction performance of HyperPose with [OpenPose 1.6](https://g | OpenPose (TinyVGG) | 34.7 MB | 384 x 256 | **124.925 FPS** | N/A | | OpenPose (MobileNet) | 17.9 MB | 432 x 368 | **84.32 FPS** | 8.5 FPS (TF-Pose) | | OpenPose (ResNet18) | 45.0 MB | 432 x 368 | **62.52 FPS** | N/A | +| OpenPifPaf (ResNet50) | 97.6 MB | 97 x 129 | **178.6 FPS** | 35.3 |

diff --git a/examples/cli.cpp b/examples/cli.cpp index c37000ed..dc4dd9f5 100644 --- a/examples/cli.cpp +++ b/examples/cli.cpp @@ -19,7 +19,7 @@ DEFINE_string( "Post-processing method. (`" kPAF "` -> [Part Affine Field] or `" kPPN "` -> [Pose Proposal Network]) or `" kPIFPAF "` -> [Pif Paf]"); DEFINE_int32(w, 384, "Width of input image."); DEFINE_int32(h, 256, "Height of input image."); -DEFINE_int32(max_batch_size, 8, "Max batch size for inference engine to execute."); +DEFINE_int32(max_batch_size, 4, "Max batch size for inference engine to execute."); // Execution Mode DEFINE_bool(imshow, true, "Whether to open an imshow window."); @@ -152,7 +152,7 @@ int main(int argc, char** argv) return hp::parser::pose_proposal(engine.input_size()); if (FLAGS_post == kPIFPAF) - return hp::parser::pifpaf{}; + return hp::parser::pifpaf(engine.input_size().height, engine.input_size().width); cli_log() << "ERROR: Unknown post-processing flag: `" << FLAGS_post << "`. Use `paf`, `ppn` or `pifpaf` please.\n"; std::exit(-1); @@ -184,6 +184,7 @@ int main(int argc, char** argv) if (FLAGS_runtime == kOPERATOR) { if (images.empty()) { // For CAP. + auto beg = clk_t::now(); auto writer = make_writer(); while (cap.isOpened()) { cv::Mat mat; @@ -227,6 +228,9 @@ int main(int argc, char** argv) break; } } + auto inference_time = std::chrono::duration(clk_t::now() - beg).count(); + std::cout << cap.get(cv::CAP_PROP_FRAME_COUNT) << " images got processed in " << inference_time << " ms, FPS = " + << 1000. * cap.get(cv::CAP_PROP_FRAME_COUNT) / inference_time << '\n'; } else { // For Vec. auto beg = clk_t::now(); // * TensorRT Inference. diff --git a/examples/gen_serialized_engine.example.cpp b/examples/gen_serialized_engine.example.cpp index f172a930..3f09dd98 100644 --- a/examples/gen_serialized_engine.example.cpp +++ b/examples/gen_serialized_engine.example.cpp @@ -12,7 +12,7 @@ DEFINE_string(output_name_list, "outputs/conf,outputs/paf", "The output node nam DEFINE_int32(input_height, 256, "Height of input image."); DEFINE_int32(input_width, 384, "Width of input image."); -DEFINE_int32(max_batch_size, 32, "The max batch size for the exported serialized model."); +DEFINE_int32(max_batch_size, 1, "The max batch size for the exported serialized model."); DEFINE_string(output_model, "", "Path to output serialized model."); diff --git a/examples/operator_api_batched_images_pifpaf.example.cpp b/examples/operator_api_batched_images_pifpaf.example.cpp index a187f1bb..5f57fa11 100644 --- a/examples/operator_api_batched_images_pifpaf.example.cpp +++ b/examples/operator_api_batched_images_pifpaf.example.cpp @@ -45,7 +45,7 @@ int main(int argc, char** argv) return tensorrt(tensorrt_serialized{ FLAGS_model_file }, { FLAGS_input_width, FLAGS_input_height }, batch.size()); }(); - hp::parser::pifpaf parser{}; + hp::parser::pifpaf parser(engine.input_size().height, engine.input_size().width); using clk_t = std::chrono::high_resolution_clock; auto beg = clk_t::now(); diff --git a/include/hyperpose/operator/parser/pifpaf.hpp b/include/hyperpose/operator/parser/pifpaf.hpp index 5da7990c..41eb83de 100644 --- a/include/hyperpose/operator/parser/pifpaf.hpp +++ b/include/hyperpose/operator/parser/pifpaf.hpp @@ -1,13 +1,16 @@ #pragma once -#include "paf.hpp" #include "../../utility/data.hpp" +#include "paf.hpp" namespace hyperpose::parser { -class pifpaf{ +class pifpaf { public: - explicit pifpaf() = default; + inline explicit pifpaf(int h, int w, float thresh = 0.1) + : m_net_h(h) + , m_net_w(w) + , m_keypoint_thresh(thresh){}; std::vector process(const feature_map_t& pif, const feature_map_t& paf); template std::vector process(C&& feature_map_containers) @@ -16,8 +19,10 @@ class pifpaf{ assert(feature_map_containers.size() == 2); return process(feature_map_containers[0], feature_map_containers[1]); } + private: - float m_keypoint_thresh = 0.001f; + int m_net_w, m_net_h; + float m_keypoint_thresh; }; } // namespace hyperpose \ No newline at end of file diff --git a/src/human.cpp b/src/human.cpp index dfc46893..7473a116 100644 --- a/src/human.cpp +++ b/src/human.cpp @@ -7,7 +7,7 @@ namespace hyperpose { void draw_human(cv::Mat& img, const human_t& human) { float n = 1, s = 0, w = 1, e = 0; - for(const auto& p : human.parts) + for (const auto& p : human.parts) if (p.has_value) { n = std::min(n, p.y); s = std::max(s, p.y); diff --git a/src/pifpaf.cpp b/src/pifpaf.cpp index 1482f6bb..4bfc7064 100644 --- a/src/pifpaf.cpp +++ b/src/pifpaf.cpp @@ -1,11 +1,12 @@ -#include #include "pifpaf_decoder/openpifpaf_postprocessor.hpp" +#include namespace hyperpose::parser { // TODO: Name ORDER! -std::vector pifpaf::process(const feature_map_t& paf, const feature_map_t& pif) { - // Helpful links (Chinese): +std::vector pifpaf::process(const feature_map_t& paf, const feature_map_t& pif) +{ + // Helpful links (Chinese):: // https://zhuanlan.zhihu.com/p/93896207 // https://zhuanlan.zhihu.com/p/68073113 // pif: [17, 5, h, w] => KEY POINTS; @@ -18,99 +19,35 @@ std::vector pifpaf::process(const feature_map_t& paf, const feature_map // TODO: OPTIMIZE THIS. lpdnn::aiapp_impl::OpenPifPafPostprocessor pp; + pp.keypointThreshold = m_keypoint_thresh; size_t h = pif.shape()[pif.shape().size() - 2]; size_t w = pif.shape().back(); - std::vector pif_conf, pif_xy, pif_s, paf_conf, paf_xy1, paf_xy2, paf_b1, paf_b2; - const auto tensor_sharding_to_vector = [](const feature_map_t& tensor, std::vector& vec, size_t dim2) { - size_t d0 = tensor.shape()[0]; - size_t d1 = tensor.shape()[1]; - size_t h = tensor.shape()[2]; - size_t w = tensor.shape()[3]; - for (int i = 0; i < d0; ++i) { - for (int j = 0; j < h; ++j) { - for (int k = 0; k < w; ++k) { - vec.push_back(tensor.view()[ - i * d1 * w * h + - dim2 * h * w + - j * w + - k - ]); - } - } - } - }; + std::vector pif_vec{}, paf_vec{}; - const auto tensor_sharding_to_offset_vector = [](const feature_map_t& tensor, std::vector& vec, size_t dimx, size_t dimy) { + const auto raw_copy = [](const feature_map_t& tensor, std::vector& vec) { size_t d0 = tensor.shape()[0]; size_t d1 = tensor.shape()[1]; size_t h = tensor.shape()[2]; size_t w = tensor.shape()[3]; - for (int i = 0; i < d0; ++i) { - // X first & Then Y - for (int j = 0; j < h; ++j) { - for (int k = 0; k < w; ++k) { - vec.push_back(tensor.view()[ - i * d1 * w * h + - dimx * h * w + - j * w + - k - ]); - } - } - - for (int j = 0; j < h; ++j) { - for (int k = 0; k < w; ++k) { - vec.push_back(tensor.view()[ - i * d1 * w * h + - dimy * h * w + - j * w + - k - ]); - } - } + const size_t total_size = d0 * d1 * h * w; + vec.reserve(total_size); + for (size_t i = 0; i < total_size; ++i) { + vec.push_back(tensor.view()[i]); } }; - pif_conf.reserve(17 * h * w); - tensor_sharding_to_vector(pif, pif_conf, 0); - - pif_xy.reserve(17 * 2 * h * w); - tensor_sharding_to_offset_vector(pif, pif_xy, 1, 2); - - pif_s.reserve(17 * h * w); - tensor_sharding_to_vector(pif, pif_s, 4); - - // [19, 9, h, w] -> [conf, p1, p2, b1, b2, ...] - paf_conf.reserve(19 * h * w); - tensor_sharding_to_vector(paf, paf_conf, 0); - - paf_xy1.reserve(2 * 19 * h * w); - tensor_sharding_to_offset_vector(paf, paf_xy1, 1, 2); - - paf_xy2.reserve(2 * 19 * h * w); - tensor_sharding_to_offset_vector(paf, paf_xy2, 3, 4); - - paf_b1.reserve(19 * h * w); - tensor_sharding_to_vector(paf, paf_b1, 5); - - paf_b2.reserve(19 * h * w); - tensor_sharding_to_vector(paf, paf_b2, 6); + raw_copy(pif, pif_vec); + raw_copy(paf, paf_vec); // TODO: RECOVER THE INP{W, H}; - auto apires = pp.postprocess_0_8(640, 427, w, h, - pif_conf.data(), pif_xy.data(), pif_s.data(), - paf_conf.data(), paf_xy1.data(), paf_xy2.data(), paf_b1.data(), paf_b2.data()); + auto apires = pp.postprocess(m_net_w, m_net_h, w, h, pif_vec, paf_vec); std::vector ret{}; ret.reserve(apires.items.size()); - - /* - * - OpenPifPaf COCO Topology: https://miro.medium.com/max/366/0*KFrFQVj3OoGAtt6o.png -HyperPose: Unified Topology - * - */ + // OpenPifPaf COCO Topology: https://miro.medium.com/max/366/0*KFrFQVj3OoGAtt6o.png + // HyperPose: Unified Topology + // NOTE: This step is to convert pifpaf topology to hyperpose topology. for (auto&& item : apires.items) { if (item.landmarks.points.empty()) @@ -120,9 +57,9 @@ HyperPose: Unified Topology auto p2p = [this](const auto& src, auto& dst) { if (src.confidence > 0.) { - dst.score = 1;// src.confidence; FIXME - dst.x = src.position.x / 10000.; - dst.y = src.position.y / 10000.; + dst.score = 1; // src.confidence; FIXME + dst.x = src.position.x / (float)m_net_w; + dst.y = src.position.y / (float)m_net_h; dst.has_value = true; } }; @@ -139,12 +76,14 @@ HyperPose: Unified Topology }; for (size_t i = 0; i < from_index.size(); ++i) { - p2p(from[from_index[i]], to[i+2]); + p2p(from[from_index[i]], to[i + 2]); } if (to[2].has_value && to[5].has_value) { - to[1].x = (to[2].x + to[5].x) / 2;; - to[1].y = (to[2].y + to[5].y) / 2;; + to[1].x = (to[2].x + to[5].x) / 2; + ; + to[1].y = (to[2].y + to[5].y) / 2; + ; to[1].has_value = true; to[1].score = (to[2].score + to[5].score) / 2; } diff --git a/src/pifpaf_decoder/aiapp.hpp b/src/pifpaf_decoder/aiapp.hpp index 1beb0774..85c75a09 100644 --- a/src/pifpaf_decoder/aiapp.hpp +++ b/src/pifpaf_decoder/aiapp.hpp @@ -15,102 +15,104 @@ namespace lpdnn { namespace ai_app { -/// Aiapp Blob -/// This could be improved to allow referring to existing data -/// thus avoding unneeded data-copy, for example by using shared_ptr. -struct Blob { - /// Data dimensions. Mandatory if the blob represents a tensor. - std::vector dim; - - /// Data. Mandatory if the blob represents a tensor. - std::vector data; - - /// Optional raw representation. - std::vector raw; - - /// Optional CBOR representation when data is structured. - std::vector cbor; - - /// Optional additional information - /// (eg, description of internal representation: "NCHW,8bits,dp3"). - std::string info; -}; - -/// AI-App interface -class Aiapp { - public: - virtual ~Aiapp() {} - - /// @return the ai-class id for this aiapp - virtual const char* class_id() const = 0; - - /// @return the implementation id for this aiapp - virtual const char* impl_id() const = 0; - - /// Initialization options - /// \param cfg: configuration string, typically in JSON format. - /// \return: true if success - virtual bool init(const std::string& cfg) = 0; - - /// Set runtime options for the specified component - /// \param opt: runtime options, typically in JSON format. - /// \param name: subcomponent name - /// \return: true if success - virtual bool set_options(const std::string& opt, - const std::string& name = "") = 0; - - /// Introspection methods - /// \{ - - /// \return: names of all direct subcomponents of the specified component - virtual std::vector components( - const std::string& name = "") const = 0; - - /// \return output(s) of the specified component - virtual std::vector output(const std::string& name = "") const = 0; - - /// \return metrics of the specified component and all its subcomponents - virtual std::string metrics(const std::string& name = "") const = 0; - - /// set end-of-execution at the end of the specified component - /// if name is empty any exit-point previously set is removed - virtual bool set_exit_after(const std::string& name = "") = 0; - - /// \} -}; - -/// AiApp standard processing components -/// Each ai-app can contain other sub-components. -/// Each subcomponent can be identified by a pathname, for example: -/// "preprocessing.normalize" -/// "inference.net1.conv23" -struct Component { - /// Standard component names. Their use is not mandatory but - /// allows an ai-app to be supported by existing tools. - static constexpr char const* preprocessing = "preprocessing"; - static constexpr char const* inference = "inference"; - static constexpr char const* postprocessing = "postprocessing"; - - /// Ai-app interface parameters - static constexpr char const* interface = "interface"; - - /// Name separator in a component pathname string. - /// Component names can't contain the separator except possibly for the leafs - static constexpr char separator = '.'; - - /// Concatenate component names in a component pathname - static std::string join(const std::string& path, const std::string& comp) { - return path + separator + comp; - } -}; - -/// AiApp Metrics -struct Metrics { - /// Standard metrics. All timings are in microseconds. - static constexpr char const* init_time = "init_time"; - static constexpr char const* inference_time = "inference_time"; - static constexpr char const* inference_cpu_time = "inference_cpu_time"; -}; - -} // namespace ai_app -} // namespace lpdnn + /// Aiapp Blob + /// This could be improved to allow referring to existing data + /// thus avoding unneeded data-copy, for example by using shared_ptr. + struct Blob { + /// Data dimensions. Mandatory if the blob represents a tensor. + std::vector dim; + + /// Data. Mandatory if the blob represents a tensor. + std::vector data; + + /// Optional raw representation. + std::vector raw; + + /// Optional CBOR representation when data is structured. + std::vector cbor; + + /// Optional additional information + /// (eg, description of internal representation: "NCHW,8bits,dp3"). + std::string info; + }; + + /// AI-App interface + class Aiapp { + public: + virtual ~Aiapp() {} + + /// @return the ai-class id for this aiapp + virtual const char* class_id() const = 0; + + /// @return the implementation id for this aiapp + virtual const char* impl_id() const = 0; + + /// Initialization options + /// \param cfg: configuration string, typically in JSON format. + /// \return: true if success + virtual bool init(const std::string& cfg) = 0; + + /// Set runtime options for the specified component + /// \param opt: runtime options, typically in JSON format. + /// \param name: subcomponent name + /// \return: true if success + virtual bool set_options(const std::string& opt, + const std::string& name = "") + = 0; + + /// Introspection methods + /// \{ + + /// \return: names of all direct subcomponents of the specified component + virtual std::vector components( + const std::string& name = "") const = 0; + + /// \return output(s) of the specified component + virtual std::vector output(const std::string& name = "") const = 0; + + /// \return metrics of the specified component and all its subcomponents + virtual std::string metrics(const std::string& name = "") const = 0; + + /// set end-of-execution at the end of the specified component + /// if name is empty any exit-point previously set is removed + virtual bool set_exit_after(const std::string& name = "") = 0; + + /// \} + }; + + /// AiApp standard processing components + /// Each ai-app can contain other sub-components. + /// Each subcomponent can be identified by a pathname, for example: + /// "preprocessing.normalize" + /// "inference.net1.conv23" + struct Component { + /// Standard component names. Their use is not mandatory but + /// allows an ai-app to be supported by existing tools. + static constexpr char const* preprocessing = "preprocessing"; + static constexpr char const* inference = "inference"; + static constexpr char const* postprocessing = "postprocessing"; + + /// Ai-app interface parameters + static constexpr char const* interface = "interface"; + + /// Name separator in a component pathname string. + /// Component names can't contain the separator except possibly for the leafs + static constexpr char separator = '.'; + + /// Concatenate component names in a component pathname + static std::string join(const std::string& path, const std::string& comp) + { + return path + separator + comp; + } + }; + + /// AiApp Metrics + struct Metrics { + /// Standard metrics. All timings are in microseconds. + static constexpr char const* init_time = "init_time"; + static constexpr char const* inference_time = "inference_time"; + static constexpr char const* inference_cpu_time = "inference_cpu_time"; + }; + +} // namespace ai_app +} // namespace lpdnn diff --git a/src/pifpaf_decoder/image_based.hpp b/src/pifpaf_decoder/image_based.hpp index 938cedaa..914e0f7c 100644 --- a/src/pifpaf_decoder/image_based.hpp +++ b/src/pifpaf_decoder/image_based.hpp @@ -9,132 +9,144 @@ #include "aiapp.hpp" -namespace lpdnn::ai_app { - -/// 2-dimensional size -struct Dim2d { - int x; - int y; -}; - -/// Rectangle -struct Rect { - Dim2d origin; - Dim2d size; - - [[nodiscard]] bool empty() const { return size.x <= 0 || size.y <= 0; } -}; - -/// Landmarks -struct Landmark { - Dim2d position; - float confidence; /// Negative value if N/A -}; - -struct Landmarks { - /// Landmark specification identifier - std::string type; - /// Landmark points - std::vector points; -}; - -/// Image representation. -/// The data of a RAW image consists of *y scanlines of *x pixels, -/// with each pixel consisting of N interleaved 8-bit components; the first -/// pixel pointed to is top-left-most in the image. There is no padding between -/// image scanlines or between pixels, regardless of format. The number of -/// components N is 3 for RGB images, 4 for RGBA, 1 for grayscale. -/// Support for 8bits RGB format is MANDATORY for all image-processing AiApps. -/// An image can be constructed from a std::vector, or a std::string -/// or raw data pointer and size. When passing rvalues vector or strings, the -/// image will take ownership of the data, otherwise will just keep reference. -class Image { - protected: - /// Contains image data if we have ownership of it - std::vector _image_content; - - public: - /// Image format - enum class Format { - raw_grayscale = 1, /// 8bits grayscale - raw_rgb8 = 3, /// 8bits RGB *MANDATORY* - raw_rgba8 = 4, /// 8bits RGBA - - encoded = 256, /// Standard JPEG/BMP/PNG/TIFF format - - custom = 512 /// Custom format. Use attributes field for more details. - }; - - /// Don't take data ownership. - /// img_dim parameter can be omitted in case of encoded images since - /// this information will be extracted from the image content itself. - Image(Format img_format, const std::vector& data, Dim2d img_dim = {}) - : Image(img_format, data.data(), data.size(), img_dim) {} - - /// Take data ownership - Image(Format img_format, std::vector&& data, Dim2d img_dim = {}) - : _image_content(std::move(data)), - format{img_format}, - dim(img_dim), - data{_image_content.data()}, - data_size{_image_content.size()} {} - - /// Don't take data ownership. - Image(Format img_format, const std::string& data, Dim2d img_dim = {}) - : Image(img_format, (uint8_t*)data.c_str(), data.size(), img_dim) {} - - /// Take data ownership - Image(Format img_format, std::string&& data, Dim2d img_dim = {}) - : Image(img_format, - std::vector((uint8_t*)data.c_str(), - (uint8_t*)data.c_str() + data.size()), - img_dim) { - data.clear(); - } - - /// Don't take data ownership - /// img_data_size is mandatory in case of encoded images. - Image(Format img_format, const uint8_t* img_data, size_t img_data_size, - Dim2d img_dim = {}) - : format{img_format}, - dim(img_dim), - data{img_data}, - data_size{img_data_size} {} - - /// Utility factory methods - static Image encoded(const std::vector& data) { - return Image(Format::encoded, data); - } - - /// Image format - Format format; - - /// Image dimensions (for raw images) - Dim2d dim; - - /// Region of interest inside the image (all if empty) - Rect roi{}; - - /// Custom attributes. - /// This is ai-app specific and allows to specify custom data formats. - std::string attributes; - - /// Pointer to image data (no ownership of the data). - const uint8_t* data; - - /// Size of image data. Mandatory for encoded images. - size_t data_size; - - /// Additional optional information about the image. - /// May be required by some aiapps. - Landmarks landmarks; -}; - -/// Abstract image-based AiApp -class Image_based : virtual public Aiapp { - public: - /// @return supported image formats (ordered by preference) - [[nodiscard]] virtual std::vector image_formats() const = 0; -}; - -} // namespace lpdnn +namespace lpdnn { +namespace ai_app { + + /// 2-dimensional size + struct Dim2d { + int x; + int y; + }; + + /// Rectangle + struct Rect { + Dim2d origin; + Dim2d size; + + bool empty() const { return size.x <= 0 || size.y <= 0; } + }; + + /// Landmarks + struct Landmark { + Dim2d position; + float confidence; /// Negative value if N/A + }; + + struct Landmarks { + /// Landmark specification identifier + std::string type; + /// Landmark points + std::vector points; + }; + + /// Image representation. + /// The data of a RAW image consists of *y scanlines of *x pixels, + /// with each pixel consisting of N interleaved 8-bit components; the first + /// pixel pointed to is top-left-most in the image. There is no padding between + /// image scanlines or between pixels, regardless of format. The number of + /// components N is 3 for RGB images, 4 for RGBA, 1 for grayscale. + /// Support for 8bits RGB format is MANDATORY for all image-processing AiApps. + /// An image can be constructed from a std::vector, or a std::string + /// or raw data pointer and size. When passing rvalues vector or strings, the + /// image will take ownership of the data, otherwise will just keep reference. + class Image { + protected: + /// Contains image data if we have ownership of it + std::vector _image_content; + + public: + /// Image format + enum class Format { + raw_grayscale = 1, /// 8bits grayscale + raw_rgb8 = 3, /// 8bits RGB *MANDATORY* + raw_rgba8 = 4, /// 8bits RGBA + + encoded = 256, /// Standard JPEG/BMP/PNG/TIFF format + + custom = 512 /// Custom format. Use attributes field for more details. + }; + + /// Don't take data ownership. + /// img_dim parameter can be omitted in case of encoded images since + /// this information will be extracted from the image content itself. + Image(Format img_format, const std::vector& data, Dim2d img_dim = {}) + : Image(img_format, data.data(), data.size(), img_dim) + { + } + + /// Take data ownership + Image(Format img_format, std::vector&& data, Dim2d img_dim = {}) + : _image_content(std::move(data)) + , format{ img_format } + , dim(img_dim) + , data{ _image_content.data() } + , data_size{ _image_content.size() } + { + } + + /// Don't take data ownership. + Image(Format img_format, const std::string& data, Dim2d img_dim = {}) + : Image(img_format, (uint8_t*)data.c_str(), data.size(), img_dim) + { + } + + /// Take data ownership + Image(Format img_format, std::string&& data, Dim2d img_dim = {}) + : Image(img_format, + std::vector((uint8_t*)data.c_str(), + (uint8_t*)data.c_str() + data.size()), + img_dim) + { + data.clear(); + } + + /// Don't take data ownership + /// img_data_size is mandatory in case of encoded images. + Image(Format img_format, const uint8_t* img_data, size_t img_data_size, + Dim2d img_dim = {}) + : format{ img_format } + , dim(img_dim) + , data{ img_data } + , data_size{ img_data_size } + { + } + + /// Utility factory methods + static Image encoded(const std::vector& data) + { + return Image(Format::encoded, data); + } + + /// Image format + Format format; + + /// Image dimensions (for raw images) + Dim2d dim; + + /// Region of interest inside the image (all if empty) + Rect roi{}; + + /// Custom attributes. + /// This is ai-app specific and allows to specify custom data formats. + std::string attributes; + + /// Pointer to image data (no ownership of the data). + const uint8_t* data; + + /// Size of image data. Mandatory for encoded images. + size_t data_size; + + /// Additional optional information about the image. + /// May be required by some aiapps. + Landmarks landmarks; + }; + + /// Abstract image-based AiApp + class Image_based : virtual public Aiapp { + public: + /// @return supported image formats (ordered by preference) + virtual std::vector image_formats() const = 0; + }; + +} // namespace ai_app +} // namespace lpdnn diff --git a/src/pifpaf_decoder/math_helpers.cpp b/src/pifpaf_decoder/math_helpers.cpp index a571780a..f7634da6 100644 --- a/src/pifpaf_decoder/math_helpers.cpp +++ b/src/pifpaf_decoder/math_helpers.cpp @@ -1,94 +1,25 @@ - #include "math_helpers.hpp" -#include - -#ifdef __APPLE__ -#define MATH_HELPERS_ACCELERATE 1 -#else -#define MATH_HELPERS_ACCELERATE 0 -#endif - -#if MATH_HELPERS_ACCELERATE -#include -#else -#include -#endif -void vfill(float*x, unsigned long n, float v) { -#if MATH_HELPERS_ACCELERATE - vDSP_vfill(&v, x, 1, n); -#else - // Slow version - for (unsigned long i = 0; i < n; ++i) { - x[i] = v; - } -#endif -} - -void vadd(const float *a, const float *b, float *c, unsigned long n) { -#if MATH_HELPERS_ACCELERATE - vDSP_vadd(a, 1, b, 1, c, 1, n); -#else - // Slow version - for (unsigned long i = 0; i < n; ++i) { - c[i] = a[i] + b[i]; - } -#endif -} - -void vexp(float *x, unsigned long n) { -#if MATH_HELPERS_ACCELERATE - int n_ = (int)n; - vvexpf(x, x, &n_); -#else - // Slow version - for (unsigned long i = 0; i < n; ++i) { - x[i] = std::exp(x[i]); - } -#endif -} - -void vmul(const float *a, const float *b, float *c, unsigned long n) { -#if MATH_HELPERS_ACCELERATE - vDSP_vmul(a, 1, b, 1, c, 1, n); -#else - // Slow version - for (unsigned long i = 0; i < n; ++i) { - c[i] = a[i] * b[i]; - } -#endif +void vfill(float* x, unsigned long n, float v) +{ + // Slow version + for (unsigned long i = 0; i < n; ++i) { + x[i] = v; + } } -void vsmul(const float *a, float b, float *c, unsigned long n) { -#if MATH_HELPERS_ACCELERATE - vDSP_vsmul(a, 1, &b, c, 1, n); -#else - // Slow version - for (unsigned long i = 0; i < n; ++i) { - c[i] = a[i] * b; - } -#endif +void vmul(const float* a, const float* b, float* c, unsigned long n) +{ + // Slow version + for (unsigned long i = 0; i < n; ++i) { + c[i] = a[i] * b[i]; + } } -float vargmax(const float *x, unsigned long n, int* i) { - assert(n > 0); -#if MATH_HELPERS_ACCELERATE - float maxValue = 0.0f; - vDSP_Length maxIndex = 0; - vDSP_maxvi(x, 1, &maxValue, &maxIndex, n); - *i = (int)maxIndex; - return maxValue; -#else - // Slow version - float maxValue = x[0]; - unsigned long maxIndex = 0; - for (unsigned long i = 1; i < n; ++i) { - if (x[i] > maxValue) { - maxValue = x[i]; - maxIndex = i; +void vsmul(const float* a, float b, float* c, unsigned long n) +{ + // Slow version + for (unsigned long i = 0; i < n; ++i) { + c[i] = a[i] * b; } - } - *i = (int)maxIndex; - return maxValue; -#endif } diff --git a/src/pifpaf_decoder/math_helpers.hpp b/src/pifpaf_decoder/math_helpers.hpp index d187bc51..15dcb087 100644 --- a/src/pifpaf_decoder/math_helpers.hpp +++ b/src/pifpaf_decoder/math_helpers.hpp @@ -1,21 +1,10 @@ #pragma once // x[i] = v -void vfill(float*x, unsigned long n, float v); - -// c[i] = a[i] + b[i] -void vadd(const float *a, const float *b, float *c, unsigned long n); - -// x[i] = exp(x[i]) -void vexp(float *x, unsigned long n); +void vfill(float* x, unsigned long n, float v); // c[i] = a[i] * b[i] -void vmul(const float *a, const float *b, float *c, unsigned long n); +void vmul(const float* a, const float* b, float* c, unsigned long n); // c[i] = a[i] * b -void vsmul(const float *a, float b, float *c, unsigned long n); - -// out = max(x) -// i = argmax(x) -float vargmax(const float *x, unsigned long n, int* i); - +void vsmul(const float* a, float b, float* c, unsigned long n); diff --git a/src/pifpaf_decoder/object_detection.hpp b/src/pifpaf_decoder/object_detection.hpp index 7a7bc673..91c8f3c0 100644 --- a/src/pifpaf_decoder/object_detection.hpp +++ b/src/pifpaf_decoder/object_detection.hpp @@ -9,40 +9,42 @@ #include "image_based.hpp" -namespace lpdnn::ai_app { - -/// Object detection AiApp -class Object_detection : virtual public Image_based { - public: - struct Result { - struct Item { - float confidence{}; - int class_index{}; - Rect bounding_box{}; - Landmarks landmarks; +namespace lpdnn { +namespace ai_app { + + /// Object detection AiApp + class Object_detection : virtual public Image_based { + public: + struct Result { + struct Item { + float confidence; + int class_index; + Rect bounding_box; + Landmarks landmarks; + }; + + bool success{}; + std::vector items; + }; + + /// Set minimum detectable object size + /// @return true if success + virtual bool set_min_size(Dim2d minSize) = 0; + + /// Set maximum detectable object size + /// @return true if success + virtual bool set_max_size(Dim2d maxSize) = 0; + + /// Perform inference. + virtual Result execute(const Image& input) = 0; + + /// @return Names of classes + virtual std::vector classes() = 0; + + /// @return our aiapp class id + const char* class_id() const override { return ai_class_id; } + static constexpr char const* ai_class_id = "com_bonseyes::object_detection"; }; - bool success{}; - std::vector items; - }; - - /// Set minimum detectable object size - /// @return true if success - virtual bool set_min_size(Dim2d minSize) = 0; - - /// Set maximum detectable object size - /// @return true if success - virtual bool set_max_size(Dim2d maxSize) = 0; - - /// Perform inference. - virtual Result execute(const Image& input) = 0; - - /// @return Names of classes - virtual std::vector classes() = 0; - - /// @return our aiapp class id - [[nodiscard]] const char* class_id() const override { return ai_class_id; } - static constexpr char const* ai_class_id = "com_bonseyes::object_detection"; -}; - -} // namespace lpdnn +} // namespace ai_app +} // namespace lpdnn diff --git a/src/pifpaf_decoder/openpifpaf_postprocessor.cpp b/src/pifpaf_decoder/openpifpaf_postprocessor.cpp index 21049d96..d0a52617 100644 --- a/src/pifpaf_decoder/openpifpaf_postprocessor.cpp +++ b/src/pifpaf_decoder/openpifpaf_postprocessor.cpp @@ -1,1157 +1,930 @@ -#include +// Heavily modified from openpifpaf/cpp/example. + #include +#include #include +#include +#include +#include +#include +#include #include +#include #include -#include -#include "openpifpaf_postprocessor.hpp" -#include "math_helpers.hpp" - -namespace lpdnn::aiapp_impl { - -const int OpenPifPafPostprocessor::bones[19][2] = { - {16, 14}, {14, 12}, {17, 15}, {15, 13}, {12, 13}, { 6, 12}, { 7, 13}, - { 6, 7}, { 6, 8}, { 7, 9}, { 8, 10}, { 9, 11}, { 2, 3}, { 1, 2}, - { 1, 3}, { 2, 4}, { 3, 5}, { 4, 6}, { 5, 7}, -}; +#include +#include +#include +#include -constexpr int C = 17; -constexpr float stride = 8.0f; -constexpr float seedThreshold = 0.2f; -constexpr float keypointThreshold = 0.001f; -constexpr float instanceThreshold = 0.2f; - -/* - Creates a (2, h, w) tensor where the first part is: - 0, 1, 2, 3, ..., w-1, - 0, 1, 2, 3, ..., w-1, - 0, 1, 2, 3, ..., w-1, - ... - and the second part is: - 0, 0, 0, 0, ..., 0, - 1, 1, 1, 1, ..., 1, - 2, 2, 2, 2, ..., 2, - ... - Used for normaling the PIFs and PAFs. -*/ -static std::vector makeIndexField(int h, int w) { - std::vector indexField(2 * h * w); - float* ptr = indexField.data(); - for (int y = 0; y < h; ++y) { - for (int x = 0; x < w; ++x) { - ptr[ y *w + x] = (float)x; - ptr[(y + h)*w + x] = (float)y; - } - } - return indexField; -} +#include "math_helpers.hpp" +#include "openpifpaf_postprocessor.hpp" -static void scalarSquareAddConstant(float* field, - int fieldH, - int fieldW, - const std::vector& x, - const std::vector& y, - const std::vector& width, - const std::vector& v) -{ - // minx_np = np.round(x_np - width_np).astype(np.int) - // minx_np = np.clip(minx_np, 0, field.shape[1] - 1) - std::vector minx(x.size()); - for (size_t i = 0; i < x.size(); ++i) { - minx[i] = std::min(fieldW - 1, std::max(0, (int)std::round(x[i] - width[i]))); - } +struct Occupancy { + // self.reduction = reduction + // self.min_scale_reduced = min_scale / reduction + constexpr static float reduction = 2.f; + constexpr static float min_scale_reduced = 4.f / reduction; + size_t d0, d1, d2; // c h w + std::vector occupancy_view; - // miny_np = np.round(y_np - width_np).astype(np.int) - // miny_np = np.clip(miny_np, 0, field.shape[0] - 1) - std::vector miny(y.size()); - for (size_t i = 0; i < y.size(); ++i) { - miny[i] = std::min(fieldH - 1, std::max(0, (int)std::round(y[i] - width[i]))); + Occupancy(size_t d0_, size_t d1_, size_t d2_) + : d0(d0_) + , d1(d1_) + , d2(d2_) + , occupancy_view(d0_ * d1_ * d2_) + { } - // maxx_np = np.round(x_np + width_np).astype(np.int) - // maxx_np = np.clip(maxx_np + 1, minx_np + 1, field.shape[1]) - std::vector maxx(x.size()); - for (size_t i = 0; i < x.size(); ++i) { - maxx[i] = std::min(fieldW, std::max(minx[i] + 1, (int)std::round(x[i] + width[i]) + 1)); - } + bool fuzz_get(size_t f, float y, float x) + { + if (f >= d0) + return true; - // maxy_np = np.round(y_np + width_np).astype(np.int) - // maxy_np = np.clip(maxy_np + 1, miny_np + 1, field.shape[0]) - std::vector maxy(y.size()); - for (size_t i = 0; i < y.size(); ++i) { - maxy[i] = std::min(fieldH, std::max(miny[i] + 1, (int)std::round(y[i] + width[i]) + 1)); - } + // scalar_nonzero_clipped_with_reduction + float xx = std::min((float)d2 - 1, std::max(0.f, x / reduction)); + float yy = std::min((float)d1 - 1, std::max(0.f, y / reduction)); - // for i in range(minx.shape[0]): - // for xx in range(minx[i], maxx[i]): - // for yy in range(miny[i], maxy[i]): - // field[yy, xx] += v[i] - for (size_t i = 0; i < minx.size(); ++i) { - for (int yy = miny[i]; yy < maxy[i]; ++yy) { - for (int xx = minx[i]; xx < maxx[i]; ++xx) { - field[yy * fieldW + xx] += v[i]; - } - } + return get(f, yy, xx); } -} -static void scalarSquareAddGauss(float* field, - int fieldH, - int fieldW, - const std::vector& x, - const std::vector& y, - const std::vector& sigma_, - const std::vector& v, - float truncate = 2.0f) -{ - // sigma_np = np.maximum(1.0, sigma_np) - // width_np = np.maximum(1.0, truncate * sigma_np) - auto sigma = sigma_; - std::vector width(sigma.size()); - for (size_t i = 0; i < sigma.size(); ++i) { - sigma[i] = std::max(1.0f, sigma[i]); - width[i] = std::max(1.0f, truncate * sigma[i]); + bool get(size_t d0_, size_t d1_, size_t d2_) + { + return occupancy_view[(d1 * d2) * d0_ + d2 * d1_ + d2_]; } - // NOTE: The minx, miny, maxx, maxxy code is the same as in scalarSquareAddConstant(). - // Could probably extract that and do it just once. - - // minx_np = np.round(x_np - width_np).astype(np.int) - // minx_np = np.clip(minx_np, 0, field.shape[1] - 1) - std::vector minx(x.size()); - for (size_t i = 0; i < x.size(); ++i) { - minx[i] = std::min(fieldW - 1, std::max(0, (int)std::round(x[i] - width[i]))); - } - - // miny_np = np.round(y_np - width_np).astype(np.int) - // miny_np = np.clip(miny_np, 0, field.shape[0] - 1) - std::vector miny(y.size()); - for (size_t i = 0; i < y.size(); ++i) { - miny[i] = std::min(fieldH - 1, std::max(0, (int)std::round(y[i] - width[i]))); - } - - // maxx_np = np.round(x_np + width_np).astype(np.int) - // maxx_np = np.clip(maxx_np + 1, minx_np + 1, field.shape[1]) - std::vector maxx(x.size()); - for (size_t i = 0; i < x.size(); ++i) { - maxx[i] = std::min(fieldW, std::max(minx[i] + 1, (int)std::round(x[i] + width[i]) + 1)); - } - - // maxy_np = np.round(y_np + width_np).astype(np.int) - // maxy_np = np.clip(maxy_np + 1, miny_np + 1, field.shape[0]) - std::vector maxy(y.size()); - for (size_t i = 0; i < y.size(); ++i) { - maxy[i] = std::min(fieldH, std::max(miny[i] + 1, (int)std::round(y[i] + width[i]) + 1)); + void set(size_t d0_, size_t d1_, size_t d2_) + { + occupancy_view[(d1 * d2) * d0_ + d2 * d1_ + d2_] = 1; } +}; - // for i in range(minx.shape[0]): - // for xx in range(minx[i], maxx[i]): - // deltax = xx - x[i] - // for yy in range(miny[i], maxy[i]): - // deltay = yy - y[i] - // vv = v[i] * np.exp(-0.5 * (deltax**2 + deltay**2) / sigma[i]**2) - // field[yy, xx] += vv - for (size_t i = 0; i < minx.size(); ++i) { - for (int xx = minx[i]; xx < maxx[i]; ++xx) { - float deltax = (float)xx - x[i]; +namespace lpdnn { +namespace aiapp_impl { + + constexpr int OpenPifPafPostprocessor::bones[19][2] = { + { 16, 14 }, + { 14, 12 }, + { 17, 15 }, + { 15, 13 }, + { 12, 13 }, + { 6, 12 }, + { 7, 13 }, + { 6, 7 }, + { 6, 8 }, + { 7, 9 }, + { 8, 10 }, + { 9, 11 }, + { 2, 3 }, + { 1, 2 }, + { 1, 3 }, + { 2, 4 }, + { 3, 5 }, + { 4, 6 }, + { 5, 7 }, + }; + + struct to_point { + int field_id; + bool possitve; + }; + + auto BY_SOURCE_MAP = [] { + // print(self.by_source) + // for i in range(17): + // for (end_i), (caf_i, connect) in self.by_source[i].items(): + // data = f'to_point{{{caf_i}, {"true" if connect else "false"}}}' + // print(f'smap[{i}][{end_i}] = {data};') + std::array>, 17> smap; + smap[0][1] = to_point{ 13, true }; + smap[0][2] = to_point{ 14, true }; + smap[1][2] = to_point{ 12, true }; + smap[1][0] = to_point{ 13, false }; + smap[1][3] = to_point{ 15, true }; + smap[2][1] = to_point{ 12, false }; + smap[2][0] = to_point{ 14, false }; + smap[2][4] = to_point{ 16, true }; + smap[3][1] = to_point{ 15, false }; + smap[3][5] = to_point{ 17, true }; + smap[4][2] = to_point{ 16, false }; + smap[4][6] = to_point{ 18, true }; + smap[5][11] = to_point{ 5, true }; + smap[5][6] = to_point{ 7, true }; + smap[5][7] = to_point{ 8, true }; + smap[5][3] = to_point{ 17, false }; + smap[6][12] = to_point{ 6, true }; + smap[6][5] = to_point{ 7, false }; + smap[6][8] = to_point{ 9, true }; + smap[6][4] = to_point{ 18, false }; + smap[7][5] = to_point{ 8, false }; + smap[7][9] = to_point{ 10, true }; + smap[8][6] = to_point{ 9, false }; + smap[8][10] = to_point{ 11, true }; + smap[9][7] = to_point{ 10, false }; + smap[10][8] = to_point{ 11, false }; + smap[11][13] = to_point{ 1, false }; + smap[11][12] = to_point{ 4, true }; + smap[11][5] = to_point{ 5, false }; + smap[12][14] = to_point{ 3, false }; + smap[12][11] = to_point{ 4, false }; + smap[12][6] = to_point{ 6, false }; + smap[13][15] = to_point{ 0, false }; + smap[13][11] = to_point{ 1, true }; + smap[14][16] = to_point{ 2, false }; + smap[14][12] = to_point{ 3, true }; + smap[15][13] = to_point{ 0, true }; + smap[16][14] = to_point{ 2, true }; + return smap; + }(); + + static const int C = 17; + static const float STRIDE = 8.0f; + static const float seedThreshold = 0.3f; // 0.5 + //static const float keypointThreshold = 0.15f; + static const float instanceThreshold = 0.2f; + + static void scalarSquareAddConstant(float* field, + int fieldH, + int fieldW, + const std::vector& x, + const std::vector& y, + const std::vector& width, + const std::vector& v) + { + // minx_np = np.round(x_np - width_np).astype(np.int) + // minx_np = np.clip(minx_np, 0, field.shape[1] - 1) + std::vector minx(x.size()); + for (size_t i = 0; i < x.size(); ++i) { + minx[i] = std::min(fieldW - 1, std::max(0, (int)std::round(x[i] - width[i]))); + } + + // miny_np = np.round(y_np - width_np).astype(np.int) + // miny_np = np.clip(miny_np, 0, field.shape[0] - 1) + std::vector miny(y.size()); + for (size_t i = 0; i < y.size(); ++i) { + miny[i] = std::min(fieldH - 1, std::max(0, (int)std::round(y[i] - width[i]))); + } + + // maxx_np = np.round(x_np + width_np).astype(np.int) + // maxx_np = np.clip(maxx_np + 1, minx_np + 1, field.shape[1]) + std::vector maxx(x.size()); + for (size_t i = 0; i < x.size(); ++i) { + maxx[i] = std::min(fieldW, std::max(minx[i] + 1, (int)std::round(x[i] + width[i]) + 1)); + } + + // maxy_np = np.round(y_np + width_np).astype(np.int) + // maxy_np = np.clip(maxy_np + 1, miny_np + 1, field.shape[0]) + std::vector maxy(y.size()); + for (size_t i = 0; i < y.size(); ++i) { + maxy[i] = std::min(fieldH, std::max(miny[i] + 1, (int)std::round(y[i] + width[i]) + 1)); + } + + // for i in range(minx.shape[0]): + // for xx in range(minx[i], maxx[i]): + // for yy in range(miny[i], maxy[i]): + // field[yy, xx] += v[i] + for (size_t i = 0; i < minx.size(); ++i) { for (int yy = miny[i]; yy < maxy[i]; ++yy) { - float deltay = (float)yy - y[i]; - float vv = v[i] * std::exp(-0.5f * (deltax*deltax + deltay*deltay) / (sigma[i]*sigma[i])); - field[yy * fieldW + xx] += vv; + for (int xx = minx[i]; xx < maxx[i]; ++xx) { + field[yy * fieldW + xx] += v[i]; + } } } } - /* - // For debugging - for (int y = 0; y < fieldH; ++y) { - for (int x = 0; x = fieldW) { return; } - if (miny >= fieldH) { return; } - - // field[miny:maxy, minx:maxx] += value - for (auto yy = miny; yy < maxy; ++yy) { - for (auto xx = minx; xx < maxx; ++xx) { - field[yy * fieldW + xx] += value; + static void scalarSquareAddGaussWitMax(float* field, + int fieldH, + int fieldW, + const std::vector& x, + const std::vector& y, + const std::vector& sigma_, + const std::vector& v, + float truncate, + float max_val = 1.0f) + { + // // ganler! + // assert(v.size() == x.size() == y.size() == sigma_.size()); + for (size_t i = 0; i < x.size(); ++i) { + float csigma = sigma_[i]; + float truncate_csigma = csigma * truncate; + float cx = x[i]; + float cy = y[i]; + float cv = v[i]; + const auto clip = [](float val, float low, float high) { + return std::max(low, std::min(high, val)); + }; + + // printf("%f, %f, %f, %f, %f\n", cx, cy, csigma, truncate_csigma, max_val); + const int64_t minx = clip(cx - truncate_csigma, 0, fieldW - 1); + const int64_t maxx = clip(cx + truncate_csigma + 1, minx + 1, fieldW); + const int64_t miny = clip(cy - truncate_csigma, 0, fieldH - 1); + const int64_t maxy = clip(cy + truncate_csigma + 1, miny + 1, fieldH); + // std::cout << minx << '\t' << maxx << '\t' << miny << '\t' << maxy << '\n'; + // printf("%lli, %lli, %lli, %lli\n", minx, maxx, miny, maxy); + + for (int64_t xx = minx; xx < maxx; ++xx) { + float deltax2 = (xx - cx) * (xx - cx); + for (int64_t yy = miny; yy < maxy; ++yy) { + float deltay2 = (yy - cy) * (yy - cy); + + if (deltax2 + deltay2 > truncate_csigma * truncate_csigma) { + continue; + } + + const auto approx_exp = [](float x) { + if (x > 2 || x < -2) + return 0.f; + x = 1.f + x / 8; + x *= x; + x *= x; + x *= x; + return x; + }; + float vv = (deltax2 < 0.25 && deltay2 < 0.25) ? cv : cv * approx_exp(-0.5 * (deltax2 + deltay2) / (csigma * csigma)); + field[yy * fieldW + xx] += vv; + field[yy * fieldW + xx] = std::min(max_val, field[yy * fieldW + xx]); + } + } } } -} - -/** - Combines the different PAF outputs into one big (19, 2, 4, h, w) tensor. - - The input tensors have the shape (19, h, w) except for j1/j2Fields, which - are (38, h, w). -*/ -void OpenPifPafPostprocessor::normalizePAF(const float* intensityFields, - const float* j1Fields, - const float* j2Fields, - const float* j1FieldsLogb, - const float* j2FieldsLogb) -{ - float* pafPtr = paf.data(); - - // Strides for the first dimension of the input tensors: - const size_t if_stride_0 = H * W; - const size_t j1f_stride_0 = H * W; - const size_t j1bf_stride_0 = H * W; - const size_t j2f_stride_0 = H * W; - const size_t j2bf_stride_0 = H * W; - - for (int i = 0; i < 19; ++i) { - // Copy the next h*w values from intensityFields. - size_t ifOffset = i * if_stride_0; - size_t outOffset = i * paf_stride_0; - memcpy(pafPtr + outOffset, intensityFields + ifOffset, H * W * sizeof(float)); - - // Copy the next 2 h*w values from j1Fields. - size_t j1fOffset = (i * 2) * j1f_stride_0; - outOffset += paf_stride_2; - memcpy(pafPtr + outOffset, j1Fields + j1fOffset, 2 * H * W * sizeof(float)); - - // Also add the index field to the values from j1Fields. - vadd(indexField.data(), j1Fields + j1fOffset, pafPtr + outOffset, 2 * H * W); - - // Copy the next h*w values from j1FieldsLogb and exponentiate. - size_t j1bfOffset = i * j1bf_stride_0; - outOffset += paf_stride_2 * 2; - memcpy(pafPtr + outOffset, j1FieldsLogb + j1bfOffset, H * W * sizeof(float)); - vexp(pafPtr + outOffset, H * W); - - // Copy the same h*w values from intensityFields again. - outOffset = i * paf_stride_0 + paf_stride_1; - memcpy(pafPtr + outOffset, intensityFields + ifOffset, H * W * sizeof(float)); - - // Copy the next 2 h*w values from j2Fields. - size_t j2fOffset = (i * 2) * j2f_stride_0; - outOffset += paf_stride_2; - memcpy(pafPtr + outOffset, j2Fields + j2fOffset, 2 * H * W * sizeof(float)); - - // Also add the index field to the values from j2Fields. - vadd(indexField.data(), j2Fields + j2fOffset, pafPtr + outOffset, 2 * H * W); - - // Copy the next h*w values from j2FieldsLogb and exponentiate. - size_t j2bfOffset = i * j2bf_stride_0; - outOffset += paf_stride_2 * 2; - memcpy(pafPtr + outOffset, j2FieldsLogb + j2bfOffset, H * W * sizeof(float)); - vexp(pafPtr + outOffset, H * W); - } - - // NOTE: We could do the exponentiation for j1/j2FieldsLogb in the Core ML - // model already. - /* - // For debugging - for (int y = 0; y < H; ++y) { - printf("%d: ", y); - for (int x = 0; x < W; ++x) { - printf("%f, ", paf[9*paf_stride_0 + 2*paf_stride_1 + 7*paf_stride_2 + y*W + x]); - } - printf("\n"); - } - */ -} - -/** - Combines the different PIF outputs into one big (17, 4, h, w) tensor. - - The input tensors have the shape (17, h, w) except for jointFields, which - is (34, h, w). -*/ -void OpenPifPafPostprocessor::normalizePIF(const float* jointIntensityFields, - const float* jointFields, - const float* scaleFields) -{ - float* pifPtr = pif.data(); - - // Strides for the first dimension of the input tensors: - const size_t iif_stride_0 = H * W; - const size_t jf_stride_0 = H * W; - const size_t sf_stride_0 = H * W; - - // The PyTorch code concatenates the following tensors: - // (17, 1, h, w) - // (17, 2, h, w) - // (17, 1, h, w) - // along the 2nd axis into one tensor of shape (17, 4, h, w). But the - // tensors from Core ML have the following shapes: - // (17, h, w) - // (34, h, w) - // (17, h, w) - // Fortunately, (17, 2, ...) has the same memory layout as (34, ...), - // so we can simply do a bunch of memcpy's. - - for (int i = 0; i < 17; ++i) { - // Copy the next h*w values from jointIntensityFields. - size_t jifOffset = i * iif_stride_0; - size_t outOffset = i * pif_stride_0; - memcpy(pifPtr + outOffset, jointIntensityFields + jifOffset, H * W * sizeof(float)); - - // Copy the next 2 h*w values from jointFields. - size_t jfOffset = (i * 2) * jf_stride_0; - outOffset += pif_stride_1; - memcpy(pifPtr + outOffset, jointFields + jfOffset, 2 * H * W * sizeof(float)); - - // Also add the index field to the values from jointFields. - vadd(indexField.data(), jointFields + jfOffset, pifPtr + outOffset, 2 * H * W); - - // Copy the next h*w values from scaleFields. - size_t sfOffset = i * sf_stride_0; - outOffset += pif_stride_1 * 2; - memcpy(pifPtr + outOffset, scaleFields + sfOffset, H * W * sizeof(float)); - } -} - -OpenPifPafPostprocessor::Target_intensity -OpenPifPafPostprocessor::targetIntensities(const std::vector& pif, - float v_th, bool coreOnly) -{ - const float pif_nn = 16.0f; - - const size_t targets_stride_0 = H_hr * W_hr; - const size_t scales_stride_0 = H_hr * W_hr; - const size_t ns_stride_0 = H_hr * W_hr; - - // These tensors need to be emptied out on each frame. - vfill(targetsCoreOnly.data(), targetsCoreOnly.size(), 0.0f); - vfill(targets.data(), targets.size(), 0.0f); - vfill(scales.data(), scales.size(), 0.0f); - vfill(ns.data(), ns.size(), 0.0f); - - std::vector v; - std::vector x; - std::vector y; - std::vector s; - - for (int i = 0; i < C; ++i) { - // Threshold pif[i, ...], which is a (4, h, w) tensor. Copy the values - // that are over the threshold into four vectors: v, x, y, s. Multiply - // x, y, s with the stride. - // - // v, x, y, s = p[:, p[0] > v_th] - // x = x * self.stride - // y = y * self.stride - // s = s * self.stride - v.clear(); - x.clear(); - y.clear(); - s.clear(); - const size_t pifOffset = i * pif_stride_0; - const size_t xOffset = pifOffset + pif_stride_1; - const size_t yOffset = xOffset + pif_stride_1; - const size_t sOffset = yOffset + pif_stride_1; - for (int j = 0; j < H*W; ++j) { - float p = pif[pifOffset + j]; - if (p > v_th) { - v.push_back(p); - x.push_back(pif[xOffset + j] * stride); - y.push_back(pif[yOffset + j] * stride); - s.push_back(pif[sOffset + j] * stride); + static void scalarSquareAddSingle(Occupancy& field, + int field_idx, + int fieldH, + int fieldW, + float x, + float y, + float width, + float reduction = 1.0, + float min_scaled_reduced = 0.0) + { + if (reduction != 1.0) { + x /= reduction; + y /= reduction; + width = std::max(min_scaled_reduced, width / reduction); + } + + // minx = max(0, int(round(x - width))) + // miny = max(0, int(round(y - width))) + auto minx = std::min(fieldW - 1, std::max(0, (int)(x - width))); + auto miny = std::min(fieldH - 1, std::max(0, (int)(y - width))); + + // maxx = max(minx + 1, min(field.shape[1], int(round(x + width)) + 1)) + // maxy = max(miny + 1, min(field.shape[0], int(round(y + width)) + 1)) + auto maxx = std::min(fieldW, std::max(minx + 1, std::min(fieldW, (int)(x + width) + 1))); + auto maxy = std::min(fieldH, std::max(miny + 1, std::min(fieldH, (int)(y + width) + 1))); + + // field[miny:maxy, minx:maxx] += value + for (auto yy = miny; yy < maxy; ++yy) { + for (auto xx = minx; xx < maxx; ++xx) { + field.set(field_idx, yy, xx); } } - - /* - // For debugging - printf("iteration: %d\n", i); - printf("v:\n"); for (auto n : v) printf("%f, ", n); printf("\n"); - printf("x:\n"); for (auto n : x) printf("%f, ", n); printf("\n"); - printf("y:\n"); for (auto n : y) printf("%f, ", n); printf("\n"); - printf("s:\n"); for (auto n : s) printf("%f, ", n); printf("\n"); - */ - - // Create a high-resolution confidence map for this keypoint. - - // v / pif_nn - std::vector v_over_pif_nn(v.size()); - vsmul(v.data(), 1.0f / pif_nn, v_over_pif_nn.data(), v.size()); - - // The original code computed the "core only" version in a separate step - // but that duplicates a bunch of work, so we do it at the same time. - const auto tco = targetsCoreOnly.data() + i * targets_stride_0; - scalarSquareAddGauss(tco, H_hr, W_hr, x, y, s, v_over_pif_nn, 0.5); - - // s * v - std::vector s_times_v(v.size()); - vmul(s.data(), v.data(), s_times_v.data(), v.size()); - - const auto t = targets.data() + i * targets_stride_0; - const auto scale = scales.data() + i * scales_stride_0; - const auto n = ns.data() + i * ns_stride_0; - scalarSquareAddGauss(t, H_hr, W_hr, x, y, s, v_over_pif_nn); - scalarSquareAddConstant(scale, H_hr, W_hr, x, y, s, s_times_v); - scalarSquareAddConstant(n, H_hr, W_hr, x, y, s, v); - } - - // m = ns > 0 - // scales[m] = scales[m] / ns[m] - for (size_t i = 0; i < scales.size(); ++i) { - const auto d = ns[i]; - if (d > 0) { scales[i] /= d; } } - return Target_intensity{ targets, scales, targetsCoreOnly }; -} - -OpenPifPafPostprocessor::Paf_target -OpenPifPafPostprocessor::scorePafTarget(const std::vector& pafvec, - const std::vector& pifhr, - float pifhr_floor, - float score_th) const -{ - std::vector> scored_forward; - std::vector> scored_backward; - - for (int c = 0; c < 19; ++c) { - // The PAF has shape (19, 2, 4, h, w). We're looking at one (2, 4, h, w) - // slice at a time in this loop. - const size_t pafOffset = c * paf_stride_0; - - // scores = np.min(fourds[:, 0], axis=0) - // mask = scores > score_th - // scores = scores[mask] - std::vector scores; - std::vector mask; - for (int i = 0; i < H * W; ++i) { - auto a = pafvec[pafOffset + i]; - auto b = pafvec[pafOffset + paf_stride_1 + i]; - auto score = std::min(a, b); - if (score > score_th) { - scores.push_back(score); - mask.push_back(i); + OpenPifPafPostprocessor::Target_intensity + OpenPifPafPostprocessor::targetIntensities(const std::vector& pif, + float v_th, bool coreOnly) + { + constexpr float PIF_NN = 16.0f; + + const size_t targets_stride_0 = H_hr * W_hr; + const size_t scales_stride_0 = H_hr * W_hr; + const size_t ns_stride_0 = H_hr * W_hr; + + // These tensors need to be emptied out on each frame. + vfill(targetsCoreOnly.data(), targetsCoreOnly.size(), 0.0f); + vfill(targets.data(), targets.size(), 0.0f); + vfill(scales.data(), scales.size(), 0.0f); + vfill(ns.data(), ns.size(), 0.0f); + + std::vector v; + std::vector x; + std::vector y; + std::vector s; + + for (int i = 0; i < C; ++i) { + // Threshold pif[i, ...], which is a (4, h, w) tensor. Copy the values + // that are over the threshold into four vectors: v, x, y, s. Multiply + // x, y, s with the stride. + // + // v, x, y, s = p[:, p[0] > v_th] + // x = x * self.stride + // y = y * self.stride + // s = s * self.stride + v.clear(); + x.clear(); + y.clear(); + s.clear(); + const size_t pifOffset = i * pif_stride_0; + const size_t xOffset = pifOffset + pif_stride_1; + const size_t yOffset = xOffset + pif_stride_1; + const size_t sOffset = yOffset + pif_stride_1 * 2; + for (int j = 0; j < H * W; ++j) { + float p = pif[pifOffset + j]; + if (p > v_th) { + v.push_back(p); + x.push_back(pif[xOffset + j] * STRIDE); + y.push_back(pif[yOffset + j] * STRIDE); + s.push_back(std::max(1., 0.5 * pif[sOffset + j] * STRIDE)); + } } - } - // fourds = fourds[:, :, mask] - const size_t scores_size = scores.size(); - std::vector masked(2 * 4 * scores_size); - for (size_t i = 0; i < mask.size(); ++i) { - const auto m = mask[i]; - masked[i ] = pafvec[pafOffset + m]; - masked[i + scores_size ] = pafvec[pafOffset + paf_stride_2 + m]; - masked[i + scores_size*2] = pafvec[pafOffset + paf_stride_2*2 + m]; - masked[i + scores_size*3] = pafvec[pafOffset + paf_stride_2*3 + m]; - masked[i + scores_size*4] = pafvec[pafOffset + paf_stride_1 + m]; - masked[i + scores_size*5] = pafvec[pafOffset + paf_stride_1 + paf_stride_2 + m]; - masked[i + scores_size*6] = pafvec[pafOffset + paf_stride_1 + paf_stride_2*2 + m]; - masked[i + scores_size*7] = pafvec[pafOffset + paf_stride_1 + paf_stride_2*3 + m]; - } + /* + // For debugging + printf("iteration: %d\n", i); + printf("v:\n"); for (auto n : v) printf("%f, ", n); printf("\n"); + printf("x:\n"); for (auto n : x) printf("%f, ", n); printf("\n"); + printf("y:\n"); for (auto n : y) printf("%f, ", n); printf("\n"); + printf("s:\n"); for (auto n : s) printf("%f, ", n); printf("\n"); + */ - std::vector scores_b(scores_size); - if (pifhr_floor < 1.0f) { - // ij_b = np.round(fourds[0, 1:3] * self.stride).astype(np.int) - // ij_b[0] = np.clip(ij_b[0], 0, self._pifhr.shape[2] - 1) - // ij_b[1] = np.clip(ij_b[1], 0, self._pifhr.shape[1] - 1) - std::vector ij_b(2 * scores_size); - for (size_t i = 0; i < scores_size*2; ++i) { - const int v = (int)std::round(masked[scores_size + i] * stride); - ij_b[i] = std::min(std::max(0, v), i < scores_size ? W_hr - 1 : H_hr - 1); + // Create a high-resolution confidence map for this keypoint. + // std::cout << x.size() << '\t'<< y.size() << '\t'<< v.size() << '\t' << s.size() << '\n'; + // v / pif_nn + std::vector v_over_pif_nn(v.size()); + vsmul(v.data(), 1.0f / PIF_NN, v_over_pif_nn.data(), v.size()); + + // The original code computed the "core only" version in a separate step + // but that duplicates a bunch of work, so we do it at the same time. + const auto tco = targetsCoreOnly.data() + i * targets_stride_0; + scalarSquareAddGaussWitMax(tco, H_hr, W_hr, x, y, s, v_over_pif_nn, 1.0f, 1.0f); + + size_t cnt = 0; + for (size_t dd = 0; dd < targets_stride_0; ++dd) { + if (tco[dd] > 0.01) + ++cnt; } - - // pifhr_b = self._pifhr[j1i, ij_b[1], ij_b[0]] - // scores_b = scores * (pifhr_floor + (1.0 - pifhr_floor) * pifhr_b) - const auto j1i = bones[c][0] - 1; - for (size_t i = 0; i < scores_b.size(); ++i) { - const auto pifhr_b = pifhr[j1i * pifhr_stride_0 + ij_b[scores_size + i] * pifhr_stride_1 + ij_b[i]]; - scores_b[i] = scores[i] * (pifhr_floor + (1.0f - pifhr_floor) * pifhr_b); + // std::cout << targets_stride_0 << '\t' << i << '\t'<< cnt << '\t' << tco[0] << '\n'; + + // s * v + std::vector s_times_v(v.size()); + vmul(s.data(), v.data(), s_times_v.data(), v.size()); + + const auto t = targets.data() + i * targets_stride_0; + const auto scale = scales.data() + i * scales_stride_0; + const auto n = ns.data() + i * ns_stride_0; + scalarSquareAddGaussWitMax(t, H_hr, W_hr, x, y, s, v_over_pif_nn, 1.0f); + scalarSquareAddConstant(scale, H_hr, W_hr, x, y, s, s_times_v); + scalarSquareAddConstant(n, H_hr, W_hr, x, y, s, v); + } + + // m = ns > 0 + // scales[m] = scales[m] / ns[m] + for (size_t i = 0; i < scales.size(); ++i) { + const auto d = ns[i]; + if (d > 0) { + scales[i] /= d; } - } else { - scores_b = scores; } - - // mask_b = scores_b > score_th - std::vector mask_b; - for (int i = 0; i < (int)scores_b.size(); ++i) { - if (scores_b[i] > score_th) { mask_b.push_back(i); } + return Target_intensity{ targets, scales, targetsCoreOnly }; + } + + std::tuple + OpenPifPafPostprocessor::growConnectionBlend(float x, float y, float s, const std::array, 9>& paf_field) + { + // # source value + // paf_field = paf_center(paf_field, xy[0], xy[1], sigma=2.0) + // if paf_field.shape[1] == 0: + // return 0, 0, 0 + const float sigma = 2.0 * s; + const float sigma2 = 0.25 * s * s; + size_t score_1_i = 0, score_2_i = 0; + float score_1 = 0, score_2 = 0; + + const int paf_stride = paf_field.front().size(); + for (int i = 0; i < paf_stride; ++i) { + if ((paf_field[1][i] < x - sigma) || (paf_field[1][i] > x + sigma) || (paf_field[2][i] < y - sigma) || (paf_field[2][i] > y + sigma)) + continue; + float d2 = (paf_field[1][i] - x) * (paf_field[1][i] - x) + (paf_field[2][i] - y) * (paf_field[2][i] - y); + float score = std::exp(-0.5 * d2 / sigma2) * paf_field[0][i]; + if (score >= score_1) { + score_2_i = score_1_i; + score_2 = score_1; + score_1_i = i; + score_1 = score; + } else if (score > score_2) { + score_2_i = i; + score_2 = score; + } } - const size_t mask_b_size = mask_b.size(); - std::vector result_b(7 * mask_b_size); - for (size_t i = 0; i < mask_b_size; ++i) { - const auto m = mask_b[i]; - result_b[i ] = scores_b[m]; - result_b[i + mask_b_size ] = masked[scores_size*5 + m]; - result_b[i + mask_b_size*2] = masked[scores_size*6 + m]; - result_b[i + mask_b_size*3] = masked[scores_size*7 + m]; - result_b[i + mask_b_size*4] = masked[scores_size + m]; - result_b[i + mask_b_size*5] = masked[scores_size*2 + m]; - result_b[i + mask_b_size*6] = masked[scores_size*3 + m]; - } - scored_backward.push_back(result_b); - - std::vector scores_f(scores_size); - if (pifhr_floor < 1.0f) { - // ij_f = np.round(fourds[1, 1:3] * self.stride).astype(np.int) - // ij_f[0] = np.clip(ij_f[0], 0, self._pifhr.shape[2] - 1) - // ij_f[1] = np.clip(ij_f[1], 0, self._pifhr.shape[1] - 1) - std::vector ij_f(2 * scores_size); - for (size_t i = 0; i < scores_size*2; ++i) { - const int v = (int)std::round(masked[scores_size*5 + i] * stride); - ij_f[i] = std::min(std::max(0, v), i < scores_size ? W_hr - 1 : H_hr - 1); + if (score_1 == 0) + return { 0, 0, 0, 0 }; + + auto entry_1 = std::make_tuple(paf_field[3][score_1_i], paf_field[4][score_1_i], paf_field[6][score_1_i], paf_field[8][score_1_i]); + + auto [ex1, ey1, eb1, es1] = entry_1; + if (score_2 < 0.01 || score_2 < 0.5 * score_1) { + return { ex1, ey1, es1, score_1 * 0.5 }; + } + + // blend... + auto entry_2 = std::make_tuple(paf_field[3][score_2_i], paf_field[4][score_2_i], paf_field[6][score_2_i], paf_field[8][score_2_i]); + auto [ex2, ey2, eb2, es2] = entry_2; + + float blend_d2 = (ex1 - ex2) * (ex1 - ex2) + (ey1 - ey2) * (ey1 - ey2); + if (blend_d2 > ((es1 * es1) / 4)) { + return { ex1, ey1, es1, score_1 * 0.5 }; + } + + return { + // xysv + (score_1 * ex1 + score_2 * ex2) / (score_1 + score_2), + (score_1 * ey1 + score_2 * ey2) / (score_1 + score_2), + (score_1 * es1 + score_2 * es2) / (score_1 + score_2), + 0.5 * (score_1 + score_2), + }; + } + + using xysv = std::optional>; + + struct queue_item { // -score, xyv, start_i, end_i + template + queue_item(Args&&... args) + : data(std::make_tuple(std::forward(args)...)) + { + } + std::tuple data; + friend bool operator>(const queue_item& l, const queue_item& r) + { + return std::get<0>(l.data) >= std::get<0>(r.data); + } + friend bool operator<(const queue_item& l, const queue_item& r) + { + return std::get<0>(l.data) < std::get<0>(r.data); + } + }; + + void OpenPifPafPostprocessor::grow(Annotation& ann, + const FBContainer& pafForward, + const FBContainer& pafBackward) + { + // frontierActive = true; + // blockFrontier.clear(); + std::set> in_frontier{}; + std::priority_queue, std::greater> frontier; + + const auto add_to_frontier = [&](size_t start_i) { + for (const auto& [end_i, to_p] : BY_SOURCE_MAP[start_i]) { + int caf_i = to_p.field_id; + // std::cout << "----> " << start_i << '\t' << end_i << '\t' << caf_i << '\n'; + if (ann.keypoints[3 * end_i + 2] > 0.0) { + // std::cout << "CONTINUE start_i = " << start_i << '\n'; + continue; + } + // found! + if (in_frontier.cend() != in_frontier.find(std::make_pair(start_i, end_i))) { + // std::cout << "CONTINUE map already got you!\n"; + continue; + } + + float max_possible_score = std::sqrt(ann.keypoints[3 * start_i + 2]); + // std::cout << "put " << start_i << ' ' << end_i << "\tscore = " << max_possible_score << "\n"; + frontier.emplace(-max_possible_score, std::nullopt, start_i, end_i); + in_frontier.emplace(start_i, end_i); } - - // pifhr_f = self._pifhr[j2i, ij_f[1], ij_f[0]] - // scores_f = scores * (pifhr_floor + (1.0 - pifhr_floor) * pifhr_f) - const auto j2i = bones[c][1] - 1; - for (size_t i = 0; i < scores_f.size(); ++i) { - const auto pifhr_f = pifhr[j2i * pifhr_stride_0 + ij_f[scores_size + i] * pifhr_stride_1 + ij_f[i]]; - scores_f[i] = scores[i] * (pifhr_floor + (1.0f - pifhr_floor) * pifhr_f); + }; + + const auto frontier_get = [&]() -> std::optional { + while (!frontier.empty()) { + auto entry = frontier.top(); + frontier.pop(); + + { + auto [_a, _b, start_i, end_i] = entry.data; + // std::cout << "POP " << start_i << ' ' << end_i << " has val = " << std::get<1>(entry.data).has_value() << '\n'; + } + + if (std::get<1>(entry.data).has_value()) { + // std::cout << "RETURN \n"; + return entry; + } + + auto [_a, _b, start_i, end_i] = entry.data; + if (ann.keypoints[end_i * 3 + 2] > 0.0) + continue; + + // connection_value(self, ann, caf_scored, start_i, end_i, *, reverse_match=True): + auto new_xysv = [&](size_t start_i, size_t end_i) -> xysv { + const auto& point = BY_SOURCE_MAP[start_i][end_i]; + int caf_i = point.field_id; + bool is_forward = point.possitve; + const auto& caf_f = is_forward ? pafForward[caf_i] : pafBackward[caf_i]; // [19, 9, N] + const auto& caf_b = is_forward ? pafBackward[caf_i] : pafForward[caf_i]; + auto [x, y, v] = std::make_tuple(ann.keypoints[start_i * 3], ann.keypoints[start_i * 3 + 1], ann.keypoints[start_i * 3 + 2]); + float xy_scale_s = std::max(0.f, ann.jointScales[start_i]); + const auto [nx, ny, ns, nv] = growConnectionBlend(x, y, xy_scale_s, caf_f); + // std::cout << "NEW:\t" << nx << '\t' << ny << '\t' << ns << '\t' << nv << '\n'; + + if (nv == 0) + return std::nullopt; + + float keypoint_score = std::sqrt(nv * v); + if (keypoint_score < keypointThreshold) + return std::nullopt; + // Use relative threashold + constexpr float keypoint_threshold_rel = 0.5; + if (keypoint_score < v * keypoint_threshold_rel) + return std::nullopt; + + float xy_scale_t = std::max(0.f, ns); + // if self.reverse_match and reverse_match -> true + const auto [rx, ry, rs, rv] = growConnectionBlend(nx, ny, xy_scale_t, caf_b); + // std::cout << "REVERSE:\t" << rx << '\t' << ry << '\t' << rs << '\t' << rv << '\n'; + if (rs == 0 || std::abs(x - rx) + std::abs(y - ry) > xy_scale_s) + return std::nullopt; + + return std::make_tuple(nx, ny, ns, keypoint_score); + }(start_i, end_i); + + if (std::nullopt == new_xysv) + continue; + + frontier.emplace(-std::get<3>(new_xysv.value()), new_xysv, start_i, end_i); } - } else { - scores_f = scores; - } - - // mask_f = scores_f > score_th - std::vector mask_f; - for (int i = 0; i < (int)scores_b.size(); ++i) { - if (scores_f[i] > score_th) { mask_f.push_back(i); } - } + return std::nullopt; + }; - // scored_forward.append(np.concatenate(( - // np.expand_dims(scores_f[mask_f], 0), - // fourds[0, 1:4][:, mask_f], - // fourds[1, 1:4][:, mask_f], - // ))) - const size_t mask_f_size = mask_f.size(); - std::vector result_f(7 * mask_f_size); - for (size_t i = 0; i < mask_f_size; ++i) { - const auto m = mask_f[i]; - result_f[i ] = scores_f[m]; - result_f[i + mask_f_size ] = masked[scores_size + m]; - result_f[i + mask_f_size*2] = masked[scores_size*2 + m]; - result_f[i + mask_f_size*3] = masked[scores_size*3 + m]; - result_f[i + mask_f_size*4] = masked[scores_size*5 + m]; - result_f[i + mask_f_size*5] = masked[scores_size*6 + m]; - result_f[i + mask_f_size*6] = masked[scores_size*7 + m]; + for (size_t joint_i = 0; joint_i < N_PIFPAF_KEYPOINTS; ++joint_i) { + if (ann.keypoints[3 * joint_i + 2] != 0.0) { + // std::cout << "-----joint_i " << joint_i << "\n"; + add_to_frontier(joint_i); + } } - scored_forward.push_back(result_f); - - /* - // For debugging - printf("iteration: %d\n", c); - printf("scores:\n"); for (auto n : scores) printf("%f, ", n); printf("\n"); - printf("mask:\n"); for (auto n : mask) printf("%d, ", n); printf("\n"); - printf("masked:\n"); for (auto n : masked) printf("%f, ", n); printf("\n"); - printf("scores_b:\n"); for (auto n : scores_b) printf("%f, ", n); printf("\n"); - printf("scores_f:\n"); for (auto n : scores_f) printf("%f, ", n); printf("\n"); - */ - } - return Paf_target{ scored_forward, scored_backward }; -} -std::vector -OpenPifPafPostprocessor::pifhrSeeds(const std::vector& pifhrScales, - const std::vector& pifhrCore) -{ - std::vector seeds; - - for (int field_i = 0; field_i < 17; ++field_i) { - const size_t pifhrScalesOffset = field_i * pifhr_stride_0; - const size_t pifhrCoreOffset = field_i * pifhr_stride_0; - - // candidates = np.concatenate((index_fields, np.expand_dims(f, 0)), 0) - // mask = f > self.seed_threshold - std::vector mask; - for (int i = 0; i < H_hr * W_hr; ++i) { - const auto value = pifhrCore[pifhrCoreOffset + i]; - if (value > seedThreshold) { mask.push_back(i); } - } + while (true) { + auto entry = frontier_get(); + if (!entry.has_value()) + break; - // candidates = np.moveaxis(candidates[:, mask], 0, -1) - // This is a (count, 3) tensor where count is #elements over threshold. - std::vector masked(mask.size() * 3); - for (size_t i = 0; i < mask.size(); ++i) { - const auto m = mask[i]; - masked[i*3 ] = indexField_hr[m]; - masked[i*3 + 1] = indexField_hr[m + H_hr*W_hr]; - masked[i*3 + 2] = pifhrCore[pifhrCoreOffset + m]; + auto [_, new_xysv, jsi, jti] = entry.value().data; + + // std::cout << "jsi = " << jsi << ", jti = " << jti << ", ann.data[jti, 2] = " << ann.keypoints[jti * 3 + 2] << '\n'; + if (ann.keypoints[jti * 3 + 2] > 0.0) + continue; + + auto [nx, ny, ns, nv] = new_xysv.value(); + ann.keypoints[jti * 3 + 0] = nx; + ann.keypoints[jti * 3 + 1] = ny; + ann.keypoints[jti * 3 + 2] = nv; + ann.jointScales[jti] = ns; + add_to_frontier(jti); + } + } + + std::vector OpenPifPafPostprocessor::softNMS(std::vector& annotations) + { + float maxx = 0.0f; + float maxy = 0.0f; + for (auto& ann : annotations) { + for (int k = 0; k < N_PIFPAF_KEYPOINTS; ++k) { + auto x = ann.keypoints[k * 3]; + auto y = ann.keypoints[k * 3 + 1]; + if (x > maxx) { + maxx = x; + } + if (y > maxy) { + maxy = y; + } + } } - // occupied = np.zeros(s.shape) - std::vector occupied(H_hr * W_hr, 0.0f); + const auto h = (int)(maxy + 1); + const auto w = (int)(maxx + 1); + Occupancy occupied(17, h, w); - std::vector sorted(mask.size()); + std::vector sorted(annotations.size()); std::iota(sorted.begin(), sorted.end(), 0); - std::sort(sorted.begin(), sorted.end(), [masked] (int const& a, int const& b) { - return masked[a*3 + 2] > masked[b*3 + 2]; + std::sort(sorted.begin(), sorted.end(), [annotations](int const& a, int const& b) { + return annotations[a].score() > annotations[b].score(); }); - // for c in sorted(candidates, key=lambda c: c[2], reverse=True): - for (auto c : sorted) { - const auto c_0 = masked[c*3]; - const auto c_1 = masked[c*3 + 1]; - const auto c_2 = masked[c*3 + 2]; - - // i, j = int(c[0]), int(c[1]) - const auto i = (int)c_0; - const auto j = (int)c_1; - if (occupied[j*W_hr + i] > 0) { continue; } - - // width = max(4, s[j, i]) - const auto s = pifhrScales[pifhrScalesOffset + j * pifhr_stride_1 + i]; - const auto width = std::max(4.0f, s); - - // scalar_square_add_single(occupied, c[0], c[1], width / 2.0, 1.0) - scalarSquareAddSingle(occupied.data(), H_hr, W_hr, c_0, c_1, width / 2.0f, 1.0f); - - // seeds.append((c[2], field_i, c[0] / self.stride, c[1] / self.stride)) - seeds.emplace_back( c_2, field_i, c_0 / stride, c_1 / stride ); - } - } - - // seeds = list(sorted(seeds, reverse=True)) - std::sort(seeds.begin(), seeds.end(), [] (const Pifhr_seed& a, const Pifhr_seed& b) { - const auto ca = std::get<0>(a); - const auto cb = std::get<0>(b); - return ca > cb; - }); - - // if len(seeds) > 500: - // if seeds[500][0] > 0.1: - // seeds = [s for s in seeds if s[0] > 0.1] - // else: - // seeds = seeds[:500] - if (seeds.size() > 500) { - seeds.resize(500); - } - return seeds; -} - -std::vector -OpenPifPafPostprocessor::pafCenter(const std::vector& paf_field, - float x, float y, float sigma) -{ - std::vector mask; - const int paf_stride = (int)paf_field.size() / 7; - for (int i = 0; i < paf_stride; ++i) { - const bool take = (paf_field[ paf_stride + i] > x - sigma * paf_field[3*paf_stride + i]) && - (paf_field[ paf_stride + i] < x + sigma * paf_field[3*paf_stride + i]) && - (paf_field[2*paf_stride + i] > y - sigma * paf_field[3*paf_stride + i]) && - (paf_field[2*paf_stride + i] < y + sigma * paf_field[3*paf_stride + i]); - if (take) { mask.push_back(i); } - } - if (mask.empty()) { return {}; } - - const int mask_size = (int)mask.size(); - const int out_stride = mask_size; - std::vector result(7 * mask_size, 0.0f); - for (int j = 0; j < 7; ++j) { - for (int i = 0; i < mask_size; ++i) { - const int m = mask[i]; - result[j*out_stride + i] = paf_field[j*paf_stride + m]; - } - } - return result; -} - -OpenPifPafPostprocessor::Connection -OpenPifPafPostprocessor::growConnection(float x, float y, - const std::vector& paf_field_) -{ - // # source value - // paf_field = paf_center(paf_field, xy[0], xy[1], sigma=2.0) - // if paf_field.shape[1] == 0: - // return 0, 0, 0 - const auto paf_field = pafCenter(paf_field_, x, y, 2.0f); - if (paf_field.empty()) { return Connection{ 0, 0, 0}; } - - // # source distance - // d = np.linalg.norm(np.expand_dims(xy, 1) - paf_field[1:3], axis=0) - // b_source = paf_field[3] * 3.0 - // # combined value and source distance - // v = paf_field[0] - // scores = np.exp(-1.0 * d / b_source) * v # two-tailed cumulative Laplace - const int paf_stride = (int)paf_field.size() / 7; - std::vector scores(paf_stride); - for (int i = 0; i < paf_stride; ++i) { - const auto a = x - paf_field[paf_stride + i]; - const auto b = y - paf_field[paf_stride*2 + i]; - const auto d = std::sqrt(a*a + b*b); - const auto b_source = paf_field[paf_stride*3 + i] * 3.0f; - const auto v = paf_field[i]; - scores[i] = std::exp(-d / b_source) * v; - } - - // return self._target_with_maxscore(paf_field[4:7], scores) - int max_i; - const float score = vargmax(scores.data(), scores.size(), &max_i); - return Connection{ paf_field[paf_stride*4 + max_i], paf_field[paf_stride*5 + max_i], score }; -} - -std::vector OpenPifPafPostprocessor::frontier(Annotation& ann) { - std::vector f; - - for (int connection_i = 0; connection_i < numBones; ++connection_i) { - const auto bone = bones[connection_i]; - const auto j1i = bone[0] - 1; - const auto j2i = bone[1] - 1; - if (ann.keypoints[j1i*3 + 2] > 0.0f && ann.keypoints[j2i*3 + 2] == 0.0f) { - f.emplace_back( ann.keypoints[j1i*3 + 2], connection_i, true, j1i, j2i ); - } - } - - for (int connection_i = 0; connection_i < numBones; ++connection_i) { - const auto bone = bones[connection_i]; - const auto j1i = bone[0] - 1; - const auto j2i = bone[1] - 1; - if (ann.keypoints[j2i*3 + 2] > 0.0f && ann.keypoints[j1i*3 + 2] == 0.0f) { - f.emplace_back( ann.keypoints[j2i*3 + 2], connection_i, false, j1i, j2i ); - } - } - - std::sort(f.begin(), f.end(), [] (const frontier_t& a, const frontier_t& b) { - const auto ca = std::get<0>(a); - const auto cb = std::get<0>(b); - return ca > cb; - }); - - return f; -} - -OpenPifPafPostprocessor::frontier_t OpenPifPafPostprocessor::frontierIter(Annotation& ann) { - while (frontierActive) { - // unblocked_frontier = [f for f in self.frontier() - // if (f[1], f[2]) not in block_frontier] - std::vector unblockedFrontier; - for (auto f : frontier(ann)) { - const auto connection_id = std::get<1>(f); - const auto forward = std::get<2>(f); - if (blockFrontier.find(std::tuple{ connection_id, forward }) == blockFrontier.end()) { - unblockedFrontier.push_back(f); + for (auto a : sorted) { + Annotation& ann = annotations[a]; + for (int k = 0; k < N_PIFPAF_KEYPOINTS; ++k) { + const auto x = ann.keypoints[k * 3]; + const auto y = ann.keypoints[k * 3 + 1]; + const auto v = ann.keypoints[k * 3 + 2]; + if (v == 0) { + continue; + } + + const auto i = std::min(std::max(0, (int)std::round(x)), w - 1); + const auto j = std::min(std::max(0, (int)std::round(y)), h - 1); + + if (occupied.fuzz_get(k, j, i)) { + ann.keypoints[k * 3 + 2] = 0.0f; + } else { + scalarSquareAddSingle(occupied, k, h, w, x, y, ann.jointScales[k]); + } } } - /* - // For debugging - printf("unblockedFrontier "); - for (auto n : unblockedFrontier) { - printf("(%f, %d, %s, %d, %d), ", std::get<0>(n), std::get<1>(n), - std::get<2>(n) ? "true" : "false", - std::get<3>(n), std::get<4>(n)); - } - printf("\n"); - */ - - // if not unblocked_frontier: - // break - if (unblockedFrontier.empty()) { - frontierActive = false; - break; - } - - // first = unblocked_frontier[0] - // yield first - // block_frontier.add((first[1], first[2])) - const auto first = unblockedFrontier[0]; - const auto connection_id = std::get<1>(first); - const auto forward = std::get<2>(first); - blockFrontier.insert(std::tuple{ connection_id, forward }); - return first; - } - return {}; -} - -void OpenPifPafPostprocessor::grow(Annotation& ann, - const std::vector>& pafForward, - const std::vector>& pafBackward, - float th) -{ - frontierActive = true; - blockFrontier.clear(); - - while (true) { - const auto f = frontierIter(ann); - if (!frontierActive) { return; } - - const auto i = std::get<1>(f); - const auto forward = std::get<2>(f); - const auto j1i = std::get<3>(f); - const auto j2i = std::get<4>(f); - - // For debugging - //printf("grow: %d %s %d %d\n", i, forward ? "true" : "false", j1i, j2i); - - float x, y, v; - std::vector directed_paf_field; - std::vector directed_paf_field_reverse; - if (forward) { - x = ann.keypoints[j1i*3 ]; - y = ann.keypoints[j1i*3 + 1]; - v = ann.keypoints[j1i*3 + 2]; - directed_paf_field = pafForward[i]; - directed_paf_field_reverse = pafBackward[i]; - } else { - x = ann.keypoints[j2i*3 ]; - y = ann.keypoints[j2i*3 + 1]; - v = ann.keypoints[j2i*3 + 2]; - directed_paf_field = pafBackward[i]; - directed_paf_field_reverse = pafForward[i]; - } - - const auto t = growConnection(x, y, directed_paf_field); - const auto new_x = std::get<0>(t); - const auto new_y = std::get<1>(t); - auto new_v = std::get<2>(t); - - if (new_v < th) { continue; } - - // reverse match - if (th >= 0.1) { - const auto t1 = growConnection(new_x, new_y, directed_paf_field_reverse); - const auto reverse_x = std::get<0>(t1); - const auto reverse_y = std::get<1>(t1); - const auto reverse_v = std::get<2>(t1); - if (reverse_v < th) { continue; } - if (std::abs(x - reverse_x) + std::abs(y - reverse_y) > 1.0f) { continue; } - } - - new_v = std::sqrt(new_v * v); // geometric mean - - if (forward) { - if (new_v > ann.keypoints[j2i*3 + 2]) { - ann.keypoints[j2i*3 ] = new_x; - ann.keypoints[j2i*3 + 1] = new_y; - ann.keypoints[j2i*3 + 2] = new_v; - } - } else { - if (new_v > ann.keypoints[j1i*3 + 2]) { - ann.keypoints[j1i*3 ] = new_x; - ann.keypoints[j1i*3 + 1] = new_y; - ann.keypoints[j1i*3 + 2] = new_v; + std::vector filtered; + for (auto& ann : annotations) { + for (int k = 0; k < N_PIFPAF_KEYPOINTS; ++k) { + if (ann.keypoints[k * 3 + 2] > 0.0f) { + filtered.push_back(ann); + break; + } } } - } -} - -void OpenPifPafPostprocessor::fillJointScales(Annotation& ann, - const std::vector& scales, - int fieldH, - int fieldW, - float hr_scale) -{ - for (int k = 0; k < numKeypoints; ++k) { - const auto x = ann.keypoints[k*3]; - const auto y = ann.keypoints[k*3 + 1]; - const auto v = ann.keypoints[k*3 + 2]; - if (v == 0) { continue; } - - // i = max(0, min(scale_field.shape[1] - 1, int(round(xyv[0] * hr_scale)))) - // j = max(0, min(scale_field.shape[0] - 1, int(round(xyv[1] * hr_scale)))) - const auto i = std::max(0, std::min(fieldW - 1, (int)std::round(x * hr_scale))); - const auto j = std::max(0, std::min(fieldH - 1, (int)std::round(y * hr_scale))); - - // self.joint_scales[xyv_i] = scale_field[j, i] / hr_scale - ann.jointScales[k] = scales[k*pifhr_stride_0 + j*pifhr_stride_1 + i] / hr_scale; - } -} - -std::vector -OpenPifPafPostprocessor::decodeAnnotations(const std::vector& pifhr, - const std::vector& pifhrScales, - const std::vector& pifhrCore, - const std::vector>& pafForward, - const std::vector>& pafBackward) -{ - const auto seeds = pifhrSeeds(pifhrScales, pifhrCore); - - // This is a (17, H_hr, W_hr) tensor. - std::vector occupied(17 * H_hr * W_hr, 0.0f); - - std::vector annotations; - for (auto& seed : seeds) { - const auto v = std::get<0>(seed); - const auto f = std::get<1>(seed); - const auto x = std::get<2>(seed); - const auto y = std::get<3>(seed); - - const auto i = std::min(std::max(0, (int)std::round(x * stride)), W_hr - 1); - const auto j = std::min(std::max(0, (int)std::round(y * stride)), H_hr - 1); - if (occupied[f*H_hr*W_hr + j*W_hr + i] > 0.0f) { continue; } - - Annotation ann(f, x, y, v); - grow(ann, pafForward, pafBackward); - fillJointScales(ann, pifhrScales, H_hr, W_hr, stride); - annotations.push_back(ann); - - for (int i = 0; i < numKeypoints; ++i) { - const auto x = ann.keypoints[i*3]; - const auto y = ann.keypoints[i*3 + 1]; - const auto v = ann.keypoints[i*3 + 2]; - if (v == 0) { continue; } - - const auto width = ann.jointScales[i] * stride; - scalarSquareAddSingle(occupied.data() + i*H_hr*W_hr, H_hr, W_hr, - x * stride, y * stride, width / 2.0f, 1.0f); - } - } - return annotations; -} - -std::vector OpenPifPafPostprocessor::softNMS(std::vector& annotations) { - float maxx = 0.0f; - float maxy = 0.0f; - for (auto& ann : annotations) { - for (int k = 0; k < numKeypoints; ++k) { - auto x = ann.keypoints[k*3]; - auto y = ann.keypoints[k*3 + 1]; - if (x > maxx) { maxx = x; } - if (y > maxy) { maxy = y; } + return filtered; + + // Note: The original code sorts here on the score (descending), but + // we sort again later on so it's a bit quicker if we skip that here. + } + + void OpenPifPafPostprocessor::initTensors(int tensorWidth, int tensorHeight) + { + H = tensorHeight; + W = tensorWidth; + H_hr = (H - 1) * (int)STRIDE + 1; + W_hr = (W - 1) * (int)STRIDE + 1; + + pif_stride_1 = H * W; + pif_stride_0 = 5 * pif_stride_1; + + pifhr_stride_1 = W_hr; + pifhr_stride_0 = H_hr * pifhr_stride_1; + + const int shape = C * H_hr * W_hr; + targetsCoreOnly = std::vector(shape); + targets = std::vector(shape); + scales = std::vector(shape); + ns = std::vector(shape); + } + + ai_app::Object_detection::Result OpenPifPafPostprocessor::postprocess( + int inputWidth, int inputHeight, + int tensorWidth, int tensorHeight, + const std::vector& pif, + const std::vector& paf) + { + // Allocate the intermediate tensors the first time or when the size changes. + if (W != tensorWidth || H != tensorHeight) { + initTensors(tensorWidth, tensorHeight); + } + + const auto result_tuple = targetIntensities(pif); + const auto& pifhr = std::get<0>(result_tuple); + const auto& pifhrScales = std::get<1>(result_tuple); + const auto& pifhrCore = std::get<2>(result_tuple); + + // (17, 5, H, W) + // pif: [v, x, y, _, s] + const size_t pif_ch = 5, hw_size = H * W; + const size_t pif_shard_size = pif_ch * hw_size; + + // BEGIN: seeds = utils.CifSeeds(cifhr.accumulated).fill(fields, self.cif_metas) + std::vector> seeds{}; + + const float maxx = W_hr - 0.51, maxy = H_hr - 0.51; + for (size_t field_i = 0; field_i < N_PIFPAF_KEYPOINTS; ++field_i) { + // Search qualified entries. + size_t this_field_offset = field_i * pif_shard_size; + for (size_t hw_index = 0; hw_index < hw_size; ++hw_index) { + size_t vindex = hw_index + this_field_offset; + if (pif[vindex] > seedThreshold) { + float c = pif[vindex], x = pif[vindex + hw_size], y = pif[vindex + 2 * hw_size], s = pif[vindex + 4 * hw_size]; + // scalar_values + if (x < -0.49 || y < -0.49 || x > maxx || y > maxy) { + continue; + } + float v = pifhrCore[field_i * W_hr * H_hr + ((size_t)(y * STRIDE + 0.5) * W_hr) + (size_t)(x * STRIDE + 0.5)]; + // scalar_values :: over. + + v = 0.9 * v + 0.1 * c; + // printf("%f %f, %f, %f, %f\n", v, c, x, y, s); + + // pass or not? + if (v > seedThreshold) { + // ok, you pass. -> seeds -> [x, y, v, s] + seeds.emplace_back(v, field_i, x * STRIDE, y * STRIDE, s * STRIDE); + } + } + } } - } - - const auto h = (int)(maxy + 1); - const auto w = (int)(maxx + 1); - std::vector occupied(17 * h * w, 0.0f); - - std::vector sorted(annotations.size()); - std::iota(sorted.begin(), sorted.end(), 0); - std::sort(sorted.begin(), sorted.end(), [annotations] (int const& a, int const& b) { - return annotations[a].score() > annotations[b].score(); - }); - - for (auto a : sorted) { - Annotation& ann = annotations[a]; - for (int k = 0; k < numKeypoints; ++k) { - const auto x = ann.keypoints[k*3 ]; - const auto y = ann.keypoints[k*3 + 1]; - const auto v = ann.keypoints[k*3 + 2]; - if (v == 0) { continue; } - - const auto i = std::min(std::max(0, (int)std::round(x)), w - 1); - const auto j = std::min(std::max(0, (int)std::round(y)), h - 1); - - if (occupied[k*h*w + j*w + i] > 0.0f) { - ann.keypoints[k*3 + 2] = 0.0f; - } else { - scalarSquareAddSingle(occupied.data() + k*h*w, h, w, x, y, ann.jointScales[k], 1.0f); + // std::cout << seeds.size() << "seeds size\n"; + // END: seeds = utils.CifSeeds(cifhr.accumulated).fill(fields, self.cif_metas) + + // BEGIN: caf_scored = utils.CafScored(cifhr.accumulated).fill(fields, self.caf_metas) + // (19, 9, DYNAMICs) + constexpr size_t paf_ch = 9; + const size_t paf_shard_size = paf_ch * hw_size; + // (19, 9, H, W)... + FBContainer forward{}, backward{}; + for (size_t field_i = 0; field_i < forward.size(); ++field_i) { + constexpr float PAF_SCORE_THRE = 0.2; + constexpr float CIF_FLOOR = 0.1; + // filter! + for (size_t hw_idx = 0; hw_idx < hw_size; ++hw_idx) { + const size_t paf_conf_idx = hw_idx + field_i * paf_shard_size; + const auto conf = paf[paf_conf_idx]; + if (conf > PAF_SCORE_THRE) { + // values in this line... + std::array this_ch{}; + for (size_t chidx = 0; chidx < this_ch.size(); ++chidx) { + this_ch[chidx] = paf[paf_conf_idx + chidx * hw_size]; + if (chidx != 0) + this_ch[chidx] *= STRIDE; + } + + auto backward_pif_ch = bones[field_i][0] - 1; + auto forward_pif_ch = bones[field_i][1] - 1; + // backward pass. + constexpr std::array BACKWARD_IDX{ 0, 3, 4, 1, 2, 6, 5, 8, 7 }; + constexpr std::array FORWARD_IDX{ 0, 1, 2, 3, 4, 5, 6, 7, 8 }; + + // restore... (yet another filtering...) + // cifhr_t = scalar_values(self.cifhr[joint_t], nine[3], nine[4], default=0.0) + // nine[0] = nine[0] * (self.cif_floor + (1.0 - self.cif_floor) * cifhr_t) + const auto pass = [&this_ch, maxx, maxy, this, field_i, &pifhrCore](const auto& idx_mapping, FBContainer& cont, size_t pif_field_idx) { + float x = this_ch[idx_mapping[3]], y = this_ch[idx_mapping[4]]; + if (!(x < -0.49 || y < -0.49 || x > maxx || y > maxy)) { + // std::cout << field_i << "\tXY = \t"<< x << '\t' << y << '\t' << (size_t)(x + 0.5) << '\t' << (size_t)(y + 0.5) << "\t MAX HW: " << W_hr << ' ' << H_hr << std::endl; + float cifhr_t = pifhrCore[pif_field_idx * W_hr * H_hr + ((size_t)(y + 0.5) * W_hr) + (size_t)(x + 0.5)]; + float new_v = this_ch[0] * (CIF_FLOOR + (1 - CIF_FLOOR) * cifhr_t); + if (new_v > PAF_SCORE_THRE) { + // forward pass. + for (size_t fwd_idx = 0; fwd_idx < cont.front().size(); ++fwd_idx) { + // restore! + cont[field_i][fwd_idx].push_back(this_ch[idx_mapping[fwd_idx]]); + } + cont[field_i][0].back() = new_v; + } + } + }; + + pass(BACKWARD_IDX, backward, backward_pif_ch); + pass(FORWARD_IDX, forward, forward_pif_ch); + } } } - } + // for (const auto& f : forward) { + // std::cout << "(" << f.size() << ", " << f.front().size() << "), "; + // } + // std::cout << '\n'; + // for (const auto& f : backward) { + // std::cout << "(" << f.size() << ", " << f.front().size() << "), "; + // } + // std::cout << '\n'; + // END: caf_scored = utils.CafScored(cifhr.accumulated).fill(fields, self.caf_metas) + std::sort(seeds.begin(), seeds.end(), std::greater{}); + + // occupacy map. + // std::cout << C << ' ' << H_hr << ' ' << W_hr << '\n'; + Occupancy occupied(C, H_hr, W_hr); + std::vector annotations; + for (const auto& [v, f, x, y, s] : seeds) { + if (occupied.fuzz_get(f, y, x)) { + continue; + } - std::vector filtered; - for (auto& ann : annotations) { - for (int k = 0; k < numKeypoints; ++k) { - if (ann.keypoints[k*3 + 2] > 0.0f) { - filtered.push_back(ann); - break; + Annotation ann(f, x, y, v); + ann.jointScales[f] = s; + grow(ann, forward, backward); + annotations.push_back(ann); + + for (int i = 0; i < N_PIFPAF_KEYPOINTS; ++i) { + const auto ax = ann.keypoints[i * 3]; + const auto ay = ann.keypoints[i * 3 + 1]; + const auto av = ann.keypoints[i * 3 + 2]; + if (av == 0) { + continue; + } + + const auto width = ann.jointScales[i]; + scalarSquareAddSingle(occupied, i, H_hr, W_hr, ax, ay, width, Occupancy::reduction, Occupancy::min_scale_reduced); // width is sigma... } } - } - return filtered; - - // Note: The original code sorts here on the score (descending), but - // we sort again later on so it's a bit quicker if we skip that here. -} -void OpenPifPafPostprocessor::initTensors(int tensorWidth, int tensorHeight) { - H = tensorHeight; - W = tensorWidth; - H_hr = H * (int)stride; - W_hr = W * (int)stride; + // This returns two lists that each contain 19 tensors of shape (7, ?) + // where the second dimension can vary in size (depends on thresholds). + // const auto pt = scorePafTarget(paf, pifhr); + // const auto pafForward = std::get<0>(pt); + // const auto pafBackward = std::get<1>(pt); - paf_stride_2 = H * W; - paf_stride_1 = 4 * paf_stride_2; - paf_stride_0 = 2 * paf_stride_1; - - pif_stride_1 = H * W; - pif_stride_0 = 4 * pif_stride_1; - - pifhr_stride_1 = W_hr; - pifhr_stride_0 = H_hr * pifhr_stride_1; - - indexField = makeIndexField(H, W); - indexField_hr = makeIndexField(H_hr, W_hr); - paf = std::vector(19 * 2 * 4 * H * W); - pif = std::vector(17 * 4 * H * W); - - const int shape = C * H_hr * W_hr; - targetsCoreOnly = std::vector(shape); - targets = std::vector(shape); - scales = std::vector(shape); - ns = std::vector(shape); -} - -ai_app::Object_detection::Result OpenPifPafPostprocessor::postprocess_0_8( - int inputWidth, int inputHeight, - int tensorWidth, int tensorHeight, - const float* pif_c, - const float* pif_r, - const float* pif_s, - const float* paf_c, - const float* paf_r1, - const float* paf_r2, - const float* paf_b1, - const float* paf_b2) -{ - this->inputWidth = inputWidth; - this->inputHeight = inputHeight; - - // Allocate the intermediate tensors the first time or when the size changes. - if (W != tensorWidth || H != tensorHeight) { - initTensors(tensorWidth, tensorHeight); - } - - normalizePAF(paf_c, paf_r1, paf_r2, paf_b1, paf_b2); - normalizePIF(pif_c, pif_r, pif_s); - - const auto ti = targetIntensities(pif); - const auto pifhr = std::get<0>(ti); - const auto pifhrScales = std::get<1>(ti); - const auto pifhrCore = std::get<2>(ti); - - /* - // For debugging - for (int c = 0; c < 17; ++c) { - for (int y = 0; y < H_hr; ++y) { - for (int x = 0; x < W_hr; ++x) { - printf("%f, ", pifhrCore[c*136*248 + y*248 + x]); + /* + // For debugging + printf("pafForward:\n"); + for (auto& i : pafForward) { + for (auto j : i) { printf("%f, ", j); } printf("\n"); + } + printf("\npafBackward:\n"); + for (auto i : pafBackward) { + for (auto& j : i) { printf("%f, ", j); } printf("\n"); + } + */ + + // auto annotations = decodeAnnotations(seeds, pifhr, pifhrScales, pifhrCore, pafForward, pafBackward); + + // Scale to input size + // for (auto& ann : annotations) { + // for (int k = 0; k < N_PIFPAF_KEYPOINTS; ++k) { + // ann.keypoints[k*3 ] *= STRIDE; + // ann.keypoints[k*3 + 1] *= STRIDE; + // std::cout << "--> Scaled: " < thresholded; + for (auto& ann : annotations) { + for (int k = 0; k < N_PIFPAF_KEYPOINTS; ++k) { + if (ann.keypoints[k * 3 + 2] < keypointThreshold) { + ann.keypoints[k * 3 + 2] = 0.0f; + } + } + if (ann.score() >= instanceThreshold) { + thresholded.push_back(ann); + } } - } - printf("\n"); - } - */ - - // This returns two lists that each contain 19 tensors of shape (7, ?) - // where the second dimension can vary in size (depends on thresholds). - const auto pt = scorePafTarget(paf, pifhr); - const auto pafForward = std::get<0>(pt); - const auto pafBackward = std::get<1>(pt); - /* - // For debugging - printf("pafForward:\n"); - for (auto& i : pafForward) { - for (auto j : i) { printf("%f, ", j); } printf("\n"); - } - printf("\npafBackward:\n"); - for (auto i : pafBackward) { - for (auto& j : i) { printf("%f, ", j); } printf("\n"); - } - */ + std::sort(thresholded.begin(), thresholded.end(), [](const Annotation& a, const Annotation& b) { + return a.score() > b.score(); + }); - auto annotations = decodeAnnotations(pifhr, pifhrScales, pifhrCore, pafForward, pafBackward); + // // Convert to normalized coordinates + // for (auto& ann : thresholded) { + // for (int k = 0; k < N_PIFPAF_KEYPOINTS; ++k) { + // ann.keypoints[k*3 ] /= inputWidth; + // ann.keypoints[k*3 + 1] /= inputHeight; + // } + // } - // Scale to input size - const float output_stride = 8.0f; - for (auto& ann : annotations) { - for (int k = 0; k < numKeypoints; ++k) { - ann.keypoints[k*3 ] *= output_stride; - ann.keypoints[k*3 + 1] *= output_stride; - ann.jointScales[k] *= output_stride; - } + /* + // For debugging + for (auto ann : thresholded) { + printf("Keypoints:\n"); + for (auto k : ann.keypoints) { + printf("%f, ", k); } - - // Non-maximum suppression - if (!annotations.empty()) { - annotations = softNMS(annotations); + printf("\nJoint scales:\n"); + for (auto k : ann.jointScales) { + printf("%f, ", k); } - - // Threshold - std::vector thresholded; - for (auto& ann : annotations) { - for (int k = 0; k < numKeypoints; ++k) { - if (ann.keypoints[k*3 + 2] < keypointThreshold) { - ann.keypoints[k*3 + 2] = 0.0f; + printf("\n"); + } + */ + + ai_app::Object_detection::Result result; + result.success = true; + for (auto& ann : thresholded) { + ai_app::Landmarks landmarks; + landmarks.type = "body_pose_pifpaf"; + + int minx = std::numeric_limits::max(), + miny = std::numeric_limits::max(), + maxx_ = std::numeric_limits::min(), + maxy_ = std::numeric_limits::min(); + + for (int k = 0; k < N_PIFPAF_KEYPOINTS; ++k) { + const int x = ann.keypoints[k * 3]; + const int y = ann.keypoints[k * 3 + 1]; + const auto v = ann.keypoints[k * 3 + 2]; + + if (v > 0.0f) { + if (x < minx) { + minx = x; + } + if (x > maxx_) { + maxx_ = x; + } + if (y < miny) { + miny = y; + } + if (y > maxy_) { + maxy_ = y; + } + } + + ai_app::Landmark landmark{}; + landmark.confidence = v; + landmark.position.x = x; + landmark.position.y = y; + landmarks.points.push_back(landmark); } - } - if (ann.score() >= instanceThreshold) { - thresholded.push_back(ann); - } - } - std::sort(thresholded.begin(), thresholded.end(), [] (const Annotation& a, const Annotation& b) { - return a.score() > b.score(); - }); + ai_app::Object_detection::Result::Item item; + item.confidence = ann.score(); + item.class_index = 1; + item.bounding_box.origin.x = minx; + item.bounding_box.origin.y = miny; + item.bounding_box.size.x = maxx_ - minx; + item.bounding_box.size.y = maxy_ - miny; + item.landmarks = landmarks; - // Convert to normalized coordinates - for (auto& ann : thresholded) { - for (int k = 0; k < numKeypoints; ++k) { - ann.keypoints[k*3 ] /= inputWidth; - ann.keypoints[k*3 + 1] /= inputHeight; + result.items.push_back(item); } + return result; } - /* - // For debugging - for (auto ann : thresholded) { - printf("Keypoints:\n"); - for (auto k : ann.keypoints) { - printf("%f, ", k); - } - printf("\nJoint scales:\n"); - for (auto k : ann.jointScales) { - printf("%f, ", k); - } - printf("\n"); - } - */ - - ai_app::Object_detection::Result result; - result.success = true; - for (auto& ann : thresholded) { - ai_app::Landmarks landmarks; - landmarks.type = "body_pose_pifpaf"; - - int minx = std::numeric_limits::max(), - miny = std::numeric_limits::max(), - maxx = -std::numeric_limits::max(), - maxy = -std::numeric_limits::max(); - - for (int k = 0; k < numKeypoints; ++k) { - const int x = ann.keypoints[k*3 ] * 10000; // FIXME: MAGIC NUMBER. - const int y = ann.keypoints[k*3 + 1] * 10000; - const auto v = ann.keypoints[k*3 + 2]; - - if (v > 0.0f) { - if (x < minx) { minx = x; } - if (x > maxx) { maxx = x; } - if (y < miny) { miny = y; } - if (y > maxy) { maxy = y; } - } - - ai_app::Landmark landmark; - landmark.confidence = v; - landmark.position.x = x; - landmark.position.y = y; - landmarks.points.push_back(landmark); - } - - ai_app::Object_detection::Result::Item item; - item.confidence = ann.score(); - item.class_index = 1; - item.bounding_box.origin.x = minx; - item.bounding_box.origin.y = miny; - item.bounding_box.size.x = maxx - minx; - item.bounding_box.size.y = maxy - miny; - item.landmarks = landmarks; - - result.items.push_back(item); - } - return result; } - -} \ No newline at end of file +} diff --git a/src/pifpaf_decoder/openpifpaf_postprocessor.hpp b/src/pifpaf_decoder/openpifpaf_postprocessor.hpp index 9124b1f1..6aa28353 100644 --- a/src/pifpaf_decoder/openpifpaf_postprocessor.hpp +++ b/src/pifpaf_decoder/openpifpaf_postprocessor.hpp @@ -1,15 +1,18 @@ #pragma once -#include +#include +#include #include #include -#include #include "object_detection.hpp" -namespace lpdnn::aiapp_impl { +namespace lpdnn { +namespace aiapp_impl { -/** + using FBContainer = std::array, 9>, 19>; + + /** Post-processing logic for OpenPifPaf \note This object caches the big tensors to save on memory allocations. @@ -21,168 +24,101 @@ namespace lpdnn::aiapp_impl { the same time. If you must use multiple threads, give each thread its own instance of this class. */ -class OpenPifPafPostprocessor -{ -public: - OpenPifPafPostprocessor() : H(0), W(0) { } - - /** - Applies post-processing to OpenPifPaf output. - - \param inpWidth Width of the input tensor in pixels. - \param inpHeight Height of the input tensor in pixels. - \param tensorWidth Width of the neural network's PIF and PAF outputs. - \param tensorHeight Height of the neural network's PIF and PAF outputs. - */ - ai_app::Object_detection::Result postprocess_0_8( - int inpWidth, int inpHeight, int tensorWidth, int tensorHeight, - const float* pif_c, // 17xHxW - const float* pif_r, // 34xHxW - const float* pif_s, // 17xHxW - const float* paf_c, // 19xHxW - const float* paf_r1, // 38xHxW - const float* paf_r2, // 38xHxW - const float* paf_b1, // 19xHxW - const float* paf_b2 // 19xHxW - ); - -public: - static const int numKeypoints = 17; - static const int numBones = 19; - - // Connections between the different keypoint indices. - // Note: these start at 1, not 0! - static const int bones[19][2]; - -private: - struct Annotation { - // Array of `numKeypoints * 3` elements: - // - element `i*3 + 0` is x-coordinate (normalized) - // - element `i*3 + 1` is y-coordinate (normalized) - // - element `i*3 + 2` is confidence score - std::vector keypoints; - - std::vector jointScales; - - Annotation(int j, float x, float y, float v) : keypoints(numKeypoints * 3), - jointScales(numKeypoints) - { - keypoints[j*3 ] = x; - keypoints[j*3 + 1] = y; - keypoints[j*3 + 2] = v; - } - - /** + class OpenPifPafPostprocessor { + public: + OpenPifPafPostprocessor() + : H(0) + , W(0) + { + } + + public: + static constexpr int N_PIFPAF_KEYPOINTS = 17; + static constexpr int N_PIFPAF_BONES = 19; + + // Connections between the different keypoint indices. + // Note: these start at 1, not 0! + static const int bones[19][2]; + float keypointThreshold; + + ai_app::Object_detection::Result postprocess( + int inputWidth, int inputHeight, + int tensorWidth, int tensorHeight, + const std::vector& pif, + const std::vector& paf); + + private: + struct Annotation { + // Array of `N_PIFPAF_KEYPOINTS * 3` elements: + // - element `i*3 + 0` is x-coordinate (normalized) + // - element `i*3 + 1` is y-coordinate (normalized) + // - element `i*3 + 2` is confidence score + std::vector keypoints; + + std::vector jointScales; + + Annotation(int j, float x, float y, float v) + : keypoints(N_PIFPAF_KEYPOINTS * 3) + , jointScales(N_PIFPAF_KEYPOINTS) + { + keypoints[j * 3] = x; + keypoints[j * 3 + 1] = y; + keypoints[j * 3 + 2] = v; + } + + /** Overall confidence score for the entire skeleton. */ - [[nodiscard]] float score() const { - float maxv = 0.0f; - float vv = 0.0f; - for (int k = 0; k < numKeypoints; ++k) { - auto v = keypoints[k*3 + 2]; - maxv = std::max(maxv, v); - vv += v * v; - } - return 0.1f * maxv + 0.9f * vv / (float)numKeypoints; - } - }; - - // 0: confidence of origin - // 1: connection index - // 2: forward? - // 3: joint index 1 (not corrected for forward) - // 4: joint index 2 (not corrected for forward) - typedef std::tuple frontier_t; - typedef std::tuple, std::vector, std::vector> Target_intensity; - typedef std::tuple>, std::vector>> Paf_target; - typedef std::tuple Pifhr_seed; - typedef std::tuple Connection; - -private: - void initTensors(int tensorWidth, int tensorHeight); - - void normalizePAF(const float* intensityFields, - const float* j1Fields, - const float* j2Fields, - const float* j1FieldsLogb, - const float* j2FieldsLogb); - - void normalizePIF(const float* jointIntensityFields, - const float* jointFields, - const float* scaleFields); - - Target_intensity - targetIntensities(const std::vector& pif, - float v_th = 0.1f, - bool coreOnly = false); - - Paf_target - scorePafTarget(const std::vector& pafvec, - const std::vector& pifhr, - float pifhr_floor = 0.01f, - float score_th = 0.1f) const; - - std::vector - pifhrSeeds(const std::vector& pifhrScales, - const std::vector& pifhrCore); - - static std::vector pafCenter(const std::vector& paf_field, - float x, float y, float sigma = 1.0f); - - static Connection - growConnection(float x, float y, const std::vector& paf_field_); - - static std::vector frontier(Annotation& ann); - - frontier_t frontierIter(Annotation& ann); - - void grow(Annotation& ann, - const std::vector>& pafForward, - const std::vector>& pafBackward, - float th = 0.1f); - - void fillJointScales(Annotation& ann, - const std::vector& scales, - int fieldH, - int fieldW, - float hr_scale); - - std::vector - decodeAnnotations(const std::vector& pifhr, - const std::vector& pifhrScales, - const std::vector& pifhrCore, - const std::vector>& pafForward, - const std::vector>& pafBackward); - - std::vector softNMS(std::vector& annotations); - -private: - // Used to normalize the skeleton keypoint coordinates to [0, 1]. - float inputWidth, inputHeight; - - // Tensor dimensions (hr = high-resolution). - int H, W, H_hr, W_hr; - - // Strides for tensor dimensions. - size_t paf_stride_2, paf_stride_1, paf_stride_0; - size_t pif_stride_1, pif_stride_0; - size_t pifhr_stride_1, pifhr_stride_0; - - // Temporary tensors. - std::vector indexField; // 2 x H x W - std::vector indexField_hr; // 2 x H x W - std::vector paf; // 19 x 2 x 4 x H x W - std::vector pif; // 17 x 4 x H x W - - // Filled in by targetIntensities(). - std::vector targetsCoreOnly; - std::vector targets; - std::vector scales; - std::vector ns; - - std::set> blockFrontier; - bool frontierActive; -}; + float score() const + { + float maxv = 0.0f; + float vv = 0.0f; + for (int k = 0; k < N_PIFPAF_KEYPOINTS; ++k) { + auto v = keypoints[k * 3 + 2]; + if (v > maxv) { + maxv = v; + } + vv += v * v; + } + return 0.1f * maxv + 0.9f * vv / (float)N_PIFPAF_KEYPOINTS; + } + }; + + typedef std::tuple, std::vector, std::vector> Target_intensity; + + private: + void initTensors(int tensorWidth, int tensorHeight); + + Target_intensity + targetIntensities(const std::vector& pif, + float v_th = 0.1f, + bool coreOnly = false); + + std::tuple + growConnectionBlend(float x, float y, float s, const std::array, 9>& paf_field_); + + // frontier_t frontierIter(Annotation& ann); + + void grow(Annotation& ann, + const FBContainer& pafForward, + const FBContainer& pafBackward); + + std::vector softNMS(std::vector& annotations); + + private: + // Tensor dimensions (hr = high-resolution). + int H, W, H_hr, W_hr; + + // Strides for tensor dimensions. + size_t pif_stride_1, pif_stride_0; + size_t pifhr_stride_1, pifhr_stride_0; + + // Filled in by targetIntensities(). + std::vector targetsCoreOnly; + std::vector targets; + std::vector scales; + std::vector ns; + }; } - +} From e8f5b1daeb91bb6551ca663c296b7fcf264dfa03 Mon Sep 17 00:00:00 2001 From: ganler Date: Sat, 26 Jun 2021 12:08:17 +0800 Subject: [PATCH 4/4] fix: ci --- cmake/hyperpose.fake.cmake | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cmake/hyperpose.fake.cmake b/cmake/hyperpose.fake.cmake index 07ae7c1c..7aa1ce0c 100644 --- a/cmake/hyperpose.fake.cmake +++ b/cmake/hyperpose.fake.cmake @@ -12,7 +12,11 @@ ADD_LIBRARY( src/stream.cpp src/thread_pool.cpp src/pose_proposal.cpp - src/human.cpp) + src/human.cpp + src/pifpaf.cpp + src/pifpaf_decoder/math_helpers.cpp + src/pifpaf_decoder/openpifpaf_postprocessor.cpp +) TARGET_LINK_LIBRARIES( ${POSE_LIB_NAME}