diff --git a/README.md b/README.md index 2db67570..7806a2d4 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,7 @@ We compare the prediction performance of HyperPose with [OpenPose 1.6](https://g | OpenPose (TinyVGG) | 34.7 MB | 384 x 256 | **124.925 FPS** | N/A | | OpenPose (MobileNet) | 17.9 MB | 432 x 368 | **84.32 FPS** | 8.5 FPS (TF-Pose) | | OpenPose (ResNet18) | 45.0 MB | 432 x 368 | **62.52 FPS** | N/A | +| OpenPifPaf (ResNet50) | 97.6 MB | 97 x 129 | **178.6 FPS** | 35.3 |

diff --git a/cmake/hyperpose.cmake b/cmake/hyperpose.cmake index 6ebb96b5..28131965 100644 --- a/cmake/hyperpose.cmake +++ b/cmake/hyperpose.cmake @@ -5,10 +5,15 @@ set(POSE_LIB_NAME hyperpose) INCLUDE(cmake/cuda.cmake) FIND_PACKAGE(OpenCV REQUIRED) +FILE(GLOB PIFPAF_DECODER + src/pifpaf_decoder/*.cpp) + ADD_LIBRARY( ${POSE_LIB_NAME} # SHARED src/logging.cpp src/tensorrt.cpp + src/pifpaf.cpp + ${PIFPAF_DECODER} src/paf.cpp src/data.cpp src/stream.cpp diff --git a/cmake/hyperpose.fake.cmake b/cmake/hyperpose.fake.cmake index 07ae7c1c..7aa1ce0c 100644 --- a/cmake/hyperpose.fake.cmake +++ b/cmake/hyperpose.fake.cmake @@ -12,7 +12,11 @@ ADD_LIBRARY( src/stream.cpp src/thread_pool.cpp src/pose_proposal.cpp - src/human.cpp) + src/human.cpp + src/pifpaf.cpp + src/pifpaf_decoder/math_helpers.cpp + src/pifpaf_decoder/openpifpaf_postprocessor.cpp +) TARGET_LINK_LIBRARIES( ${POSE_LIB_NAME} diff --git a/examples/cli.cpp b/examples/cli.cpp index 80a54aa4..dc4dd9f5 100644 --- a/examples/cli.cpp +++ b/examples/cli.cpp @@ -9,16 +9,17 @@ #define kSTREAM "stream" #define kPAF "paf" #define kPPN "ppn" +#define kPIFPAF "pifpaf" // Model Configuration. DEFINE_string(model, "../data/models/TinyVGG-V1-HW=256x384.uff", "Path to the model."); DEFINE_string( post, kPAF, - "Post-processing method. (`" kPAF "` -> [Part Affine Field] or `" kPPN "` -> [Pose Proposal Network])"); + "Post-processing method. (`" kPAF "` -> [Part Affine Field] or `" kPPN "` -> [Pose Proposal Network]) or `" kPIFPAF "` -> [Pif Paf]"); DEFINE_int32(w, 384, "Width of input image."); DEFINE_int32(h, 256, "Height of input image."); -DEFINE_int32(max_batch_size, 8, "Max batch size for inference engine to execute."); +DEFINE_int32(max_batch_size, 4, "Max batch size for inference engine to execute."); // Execution Mode DEFINE_bool(imshow, true, "Whether to open an imshow window."); @@ -37,18 +38,19 @@ namespace hp = hyperpose; class parser_variant { public: + using var_t = std::variant; template std::vector process(Container&& feature_map_containers) { return std::visit([&feature_map_containers](auto& arg) { return arg.process(feature_map_containers); }, m_parser); } - parser_variant(std::variant v) + parser_variant(var_t v) : m_parser(std::move(v)) { } private: - std::variant m_parser; + var_t m_parser; }; //parser_variant parser{parser}; @@ -142,14 +144,17 @@ int main(int argc, char** argv) }(); cli_log() << "DNN engine is built.\n"; - auto parser = parser_variant{ [&engine]() -> std::variant { + auto parser = parser_variant{ [&engine]() -> parser_variant::var_t { if (FLAGS_post == kPAF) return hp::parser::paf{}; if (FLAGS_post == kPPN) return hp::parser::pose_proposal(engine.input_size()); - cli_log() << "ERROR: Unknown post-processing flag: `" << FLAGS_post << "`. Use `paf` or `ppn` please.\n"; + if (FLAGS_post == kPIFPAF) + return hp::parser::pifpaf(engine.input_size().height, engine.input_size().width); + + cli_log() << "ERROR: Unknown post-processing flag: `" << FLAGS_post << "`. Use `paf`, `ppn` or `pifpaf` please.\n"; std::exit(-1); }() }; @@ -179,6 +184,7 @@ int main(int argc, char** argv) if (FLAGS_runtime == kOPERATOR) { if (images.empty()) { // For CAP. + auto beg = clk_t::now(); auto writer = make_writer(); while (cap.isOpened()) { cv::Mat mat; @@ -222,6 +228,9 @@ int main(int argc, char** argv) break; } } + auto inference_time = std::chrono::duration(clk_t::now() - beg).count(); + std::cout << cap.get(cv::CAP_PROP_FRAME_COUNT) << " images got processed in " << inference_time << " ms, FPS = " + << 1000. * cap.get(cv::CAP_PROP_FRAME_COUNT) / inference_time << '\n'; } else { // For Vec. auto beg = clk_t::now(); // * TensorRT Inference. diff --git a/examples/gen_serialized_engine.example.cpp b/examples/gen_serialized_engine.example.cpp index f172a930..3f09dd98 100644 --- a/examples/gen_serialized_engine.example.cpp +++ b/examples/gen_serialized_engine.example.cpp @@ -12,7 +12,7 @@ DEFINE_string(output_name_list, "outputs/conf,outputs/paf", "The output node nam DEFINE_int32(input_height, 256, "Height of input image."); DEFINE_int32(input_width, 384, "Width of input image."); -DEFINE_int32(max_batch_size, 32, "The max batch size for the exported serialized model."); +DEFINE_int32(max_batch_size, 1, "The max batch size for the exported serialized model."); DEFINE_string(output_model, "", "Path to output serialized model."); diff --git a/examples/operator_api_batched_images_pifpaf.example.cpp b/examples/operator_api_batched_images_pifpaf.example.cpp new file mode 100644 index 00000000..5f57fa11 --- /dev/null +++ b/examples/operator_api_batched_images_pifpaf.example.cpp @@ -0,0 +1,77 @@ +#include "utils.hpp" +#include +#include +#include + +// Model flags +DEFINE_string(model_file, "../data/models/openpifpaf-resnet50.onnx", "Path to the model."); + +DEFINE_bool(logging, false, "Print the logging information or not."); +DEFINE_int32(input_height, 640, "Height of input image."); +DEFINE_int32(input_width, 427, "Width of input image."); + +DEFINE_string(input_folder, "../data/media", "Folder of images to inference."); + +int main(int argc, char** argv) +{ + gflags::ParseCommandLineFlags(&argc, &argv, true); + + // * Collect data into batch. + std::vector batch = glob_images(FLAGS_input_folder); + + if (batch.empty()) { + example_log() << "No input images got. Exiting.\n"; + exit(-1); + } + + example_log() << "Batch shape: [" << batch.size() << ", 3, " << FLAGS_input_height << ", " << FLAGS_input_width << "]\n"; + + // * Create TensorRT engine. + namespace hp = hyperpose; + if (FLAGS_logging) + hp::enable_logging(); + + auto engine = [&] { + using namespace hp::dnn; + constexpr std::string_view onnx_suffix = ".onnx"; + constexpr std::string_view uff_suffix = ".uff"; + + if (std::equal(onnx_suffix.crbegin(), onnx_suffix.crend(), FLAGS_model_file.crbegin())) + return tensorrt(onnx{ FLAGS_model_file }, { FLAGS_input_width, FLAGS_input_height }, batch.size()); + + example_log() << "Your model file's suffix is not [.onnx | .uff]. Your model file path: " << FLAGS_model_file; + example_log() << "Trying to be viewed as a serialized TensorRT model."; + + return tensorrt(tensorrt_serialized{ FLAGS_model_file }, { FLAGS_input_width, FLAGS_input_height }, batch.size()); + }(); + + hp::parser::pifpaf parser(engine.input_size().height, engine.input_size().width); + + using clk_t = std::chrono::high_resolution_clock; + auto beg = clk_t::now(); + { + // * TensorRT Inference. + auto feature_map_packets = engine.inference(batch); + for (const auto& packet : feature_map_packets) + for (const auto& feature_map : packet) + example_log() << feature_map << std::endl; + + // * Paf. + std::vector> pose_vectors; + pose_vectors.reserve(feature_map_packets.size()); + for (auto&& packet : feature_map_packets) { + pose_vectors.push_back(parser.process(packet[0], packet[1])); + } + + std::cout << batch.size() << " images got processed. FPS = " + << 1000. * batch.size() / std::chrono::duration(clk_t::now() - beg).count() + << '\n'; + + for (size_t i = 0; i < batch.size(); ++i) { + cv::resize(batch[i], batch[i], { FLAGS_input_width, FLAGS_input_height }); + for (auto&& pose : pose_vectors[i]) + hp::draw_human(batch[i], pose); + cv::imwrite("output_" + std::to_string(i) + ".png", batch[i]); + } + } +} \ No newline at end of file diff --git a/include/hyperpose/hyperpose.hpp b/include/hyperpose/hyperpose.hpp index a851c758..b9df8fcb 100644 --- a/include/hyperpose/hyperpose.hpp +++ b/include/hyperpose/hyperpose.hpp @@ -9,6 +9,7 @@ #include "operator/dnn/tensorrt.hpp" #include "operator/parser/paf.hpp" +#include "operator/parser/pifpaf.hpp" #include "operator/parser/proposal_network.hpp" #include "stream/stream.hpp" \ No newline at end of file diff --git a/include/hyperpose/operator/parser/pifpaf.hpp b/include/hyperpose/operator/parser/pifpaf.hpp new file mode 100644 index 00000000..41eb83de --- /dev/null +++ b/include/hyperpose/operator/parser/pifpaf.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include "../../utility/data.hpp" +#include "paf.hpp" + +namespace hyperpose::parser { + +class pifpaf { +public: + inline explicit pifpaf(int h, int w, float thresh = 0.1) + : m_net_h(h) + , m_net_w(w) + , m_keypoint_thresh(thresh){}; + std::vector process(const feature_map_t& pif, const feature_map_t& paf); + template + std::vector process(C&& feature_map_containers) + { + // 1@pif, 2@paf. + assert(feature_map_containers.size() == 2); + return process(feature_map_containers[0], feature_map_containers[1]); + } + +private: + int m_net_w, m_net_h; + float m_keypoint_thresh; +}; + +} // namespace hyperpose \ No newline at end of file diff --git a/src/human.cpp b/src/human.cpp index dfc46893..7473a116 100644 --- a/src/human.cpp +++ b/src/human.cpp @@ -7,7 +7,7 @@ namespace hyperpose { void draw_human(cv::Mat& img, const human_t& human) { float n = 1, s = 0, w = 1, e = 0; - for(const auto& p : human.parts) + for (const auto& p : human.parts) if (p.has_value) { n = std::min(n, p.y); s = std::max(s, p.y); diff --git a/src/pifpaf.cpp b/src/pifpaf.cpp new file mode 100644 index 00000000..4bfc7064 --- /dev/null +++ b/src/pifpaf.cpp @@ -0,0 +1,97 @@ +#include "pifpaf_decoder/openpifpaf_postprocessor.hpp" +#include + +namespace hyperpose::parser { + +// TODO: Name ORDER! +std::vector pifpaf::process(const feature_map_t& paf, const feature_map_t& pif) +{ + // Helpful links (Chinese):: + // https://zhuanlan.zhihu.com/p/93896207 + // https://zhuanlan.zhihu.com/p/68073113 + // pif: [17, 5, h, w] => KEY POINTS; + // 5: [conf, dx, dy, b, scale] + // Example: array([ 0.00527313, 0.13620843, -0.32253477, 0.3263721 , 0.90980804], dtype=float32) + // heat map: f(x, y) = \sum_ij conf * N(x, y|ij) + // paf: [19, 9, h, w] => LIMBS; + // 9: [conf, [x1, y1, x2, y2], [b1, b2], [s1, s2]] + // Example: [ 0.00712654, -0.54057586, 5.4075847 , 3.0354404 , 3.1246614 , 1.0621283 , -3.5857565 , 2.6072054 , 3.8406293 ], + // TODO: OPTIMIZE THIS. + + lpdnn::aiapp_impl::OpenPifPafPostprocessor pp; + pp.keypointThreshold = m_keypoint_thresh; + size_t h = pif.shape()[pif.shape().size() - 2]; + size_t w = pif.shape().back(); + + std::vector pif_vec{}, paf_vec{}; + + const auto raw_copy = [](const feature_map_t& tensor, std::vector& vec) { + size_t d0 = tensor.shape()[0]; + size_t d1 = tensor.shape()[1]; + size_t h = tensor.shape()[2]; + size_t w = tensor.shape()[3]; + const size_t total_size = d0 * d1 * h * w; + vec.reserve(total_size); + for (size_t i = 0; i < total_size; ++i) { + vec.push_back(tensor.view()[i]); + } + }; + + raw_copy(pif, pif_vec); + raw_copy(paf, paf_vec); + + // TODO: RECOVER THE INP{W, H}; + auto apires = pp.postprocess(m_net_w, m_net_h, w, h, pif_vec, paf_vec); + + std::vector ret{}; + ret.reserve(apires.items.size()); + // OpenPifPaf COCO Topology: https://miro.medium.com/max/366/0*KFrFQVj3OoGAtt6o.png + // HyperPose: Unified Topology + // NOTE: This step is to convert pifpaf topology to hyperpose topology. + + for (auto&& item : apires.items) { + if (item.landmarks.points.empty()) + continue; + human_t man{}; + man.score = item.confidence; + + auto p2p = [this](const auto& src, auto& dst) { + if (src.confidence > 0.) { + dst.score = 1; // src.confidence; FIXME + dst.x = src.position.x / (float)m_net_w; + dst.y = src.position.y / (float)m_net_h; + dst.has_value = true; + } + }; + + auto& from = item.landmarks.points; + auto& to = man.parts; + // OpenPifPaf -> HyperPose + p2p(from[0], to[0]); + // ! to [1] + constexpr std::array from_index = { + 6, 8, 10, 5, 7, 9, + 12, 14, 16, 11, 13, 15, + 2, 1, 4, 3 + }; + + for (size_t i = 0; i < from_index.size(); ++i) { + p2p(from[from_index[i]], to[i + 2]); + } + + if (to[2].has_value && to[5].has_value) { + to[1].x = (to[2].x + to[5].x) / 2; + ; + to[1].y = (to[2].y + to[5].y) / 2; + ; + to[1].has_value = true; + to[1].score = (to[2].score + to[5].score) / 2; + } + + ret.push_back(man); + } + + return ret; +} + +} // namespace hyperpose \ No newline at end of file diff --git a/src/pifpaf_decoder/aiapp.hpp b/src/pifpaf_decoder/aiapp.hpp new file mode 100644 index 00000000..85c75a09 --- /dev/null +++ b/src/pifpaf_decoder/aiapp.hpp @@ -0,0 +1,118 @@ +/// +/// Ai-app base interface and types +/// +/// \copyright 2018 NVISO SA. All rights reserved. +/// \license This project is released under the XXXXXX License. +/// + +#pragma once + +#include +#include +#include +#include + +namespace lpdnn { +namespace ai_app { + + /// Aiapp Blob + /// This could be improved to allow referring to existing data + /// thus avoding unneeded data-copy, for example by using shared_ptr. + struct Blob { + /// Data dimensions. Mandatory if the blob represents a tensor. + std::vector dim; + + /// Data. Mandatory if the blob represents a tensor. + std::vector data; + + /// Optional raw representation. + std::vector raw; + + /// Optional CBOR representation when data is structured. + std::vector cbor; + + /// Optional additional information + /// (eg, description of internal representation: "NCHW,8bits,dp3"). + std::string info; + }; + + /// AI-App interface + class Aiapp { + public: + virtual ~Aiapp() {} + + /// @return the ai-class id for this aiapp + virtual const char* class_id() const = 0; + + /// @return the implementation id for this aiapp + virtual const char* impl_id() const = 0; + + /// Initialization options + /// \param cfg: configuration string, typically in JSON format. + /// \return: true if success + virtual bool init(const std::string& cfg) = 0; + + /// Set runtime options for the specified component + /// \param opt: runtime options, typically in JSON format. + /// \param name: subcomponent name + /// \return: true if success + virtual bool set_options(const std::string& opt, + const std::string& name = "") + = 0; + + /// Introspection methods + /// \{ + + /// \return: names of all direct subcomponents of the specified component + virtual std::vector components( + const std::string& name = "") const = 0; + + /// \return output(s) of the specified component + virtual std::vector output(const std::string& name = "") const = 0; + + /// \return metrics of the specified component and all its subcomponents + virtual std::string metrics(const std::string& name = "") const = 0; + + /// set end-of-execution at the end of the specified component + /// if name is empty any exit-point previously set is removed + virtual bool set_exit_after(const std::string& name = "") = 0; + + /// \} + }; + + /// AiApp standard processing components + /// Each ai-app can contain other sub-components. + /// Each subcomponent can be identified by a pathname, for example: + /// "preprocessing.normalize" + /// "inference.net1.conv23" + struct Component { + /// Standard component names. Their use is not mandatory but + /// allows an ai-app to be supported by existing tools. + static constexpr char const* preprocessing = "preprocessing"; + static constexpr char const* inference = "inference"; + static constexpr char const* postprocessing = "postprocessing"; + + /// Ai-app interface parameters + static constexpr char const* interface = "interface"; + + /// Name separator in a component pathname string. + /// Component names can't contain the separator except possibly for the leafs + static constexpr char separator = '.'; + + /// Concatenate component names in a component pathname + static std::string join(const std::string& path, const std::string& comp) + { + return path + separator + comp; + } + }; + + /// AiApp Metrics + struct Metrics { + /// Standard metrics. All timings are in microseconds. + static constexpr char const* init_time = "init_time"; + static constexpr char const* inference_time = "inference_time"; + static constexpr char const* inference_cpu_time = "inference_cpu_time"; + }; + +} // namespace ai_app +} // namespace lpdnn diff --git a/src/pifpaf_decoder/image_based.hpp b/src/pifpaf_decoder/image_based.hpp new file mode 100644 index 00000000..914e0f7c --- /dev/null +++ b/src/pifpaf_decoder/image_based.hpp @@ -0,0 +1,152 @@ +/// +/// Ai-app interface and types for image-based ai-apps +/// +/// \copyright 2018 NVISO SA. All rights reserved. +/// \license This project is released under the XXXXXX License. +/// + +#pragma once + +#include "aiapp.hpp" + +namespace lpdnn { +namespace ai_app { + + /// 2-dimensional size + struct Dim2d { + int x; + int y; + }; + + /// Rectangle + struct Rect { + Dim2d origin; + Dim2d size; + + bool empty() const { return size.x <= 0 || size.y <= 0; } + }; + + /// Landmarks + struct Landmark { + Dim2d position; + float confidence; /// Negative value if N/A + }; + + struct Landmarks { + /// Landmark specification identifier + std::string type; + /// Landmark points + std::vector points; + }; + + /// Image representation. + /// The data of a RAW image consists of *y scanlines of *x pixels, + /// with each pixel consisting of N interleaved 8-bit components; the first + /// pixel pointed to is top-left-most in the image. There is no padding between + /// image scanlines or between pixels, regardless of format. The number of + /// components N is 3 for RGB images, 4 for RGBA, 1 for grayscale. + /// Support for 8bits RGB format is MANDATORY for all image-processing AiApps. + /// An image can be constructed from a std::vector, or a std::string + /// or raw data pointer and size. When passing rvalues vector or strings, the + /// image will take ownership of the data, otherwise will just keep reference. + class Image { + protected: + /// Contains image data if we have ownership of it + std::vector _image_content; + + public: + /// Image format + enum class Format { + raw_grayscale = 1, /// 8bits grayscale + raw_rgb8 = 3, /// 8bits RGB *MANDATORY* + raw_rgba8 = 4, /// 8bits RGBA + + encoded = 256, /// Standard JPEG/BMP/PNG/TIFF format + + custom = 512 /// Custom format. Use attributes field for more details. + }; + + /// Don't take data ownership. + /// img_dim parameter can be omitted in case of encoded images since + /// this information will be extracted from the image content itself. + Image(Format img_format, const std::vector& data, Dim2d img_dim = {}) + : Image(img_format, data.data(), data.size(), img_dim) + { + } + + /// Take data ownership + Image(Format img_format, std::vector&& data, Dim2d img_dim = {}) + : _image_content(std::move(data)) + , format{ img_format } + , dim(img_dim) + , data{ _image_content.data() } + , data_size{ _image_content.size() } + { + } + + /// Don't take data ownership. + Image(Format img_format, const std::string& data, Dim2d img_dim = {}) + : Image(img_format, (uint8_t*)data.c_str(), data.size(), img_dim) + { + } + + /// Take data ownership + Image(Format img_format, std::string&& data, Dim2d img_dim = {}) + : Image(img_format, + std::vector((uint8_t*)data.c_str(), + (uint8_t*)data.c_str() + data.size()), + img_dim) + { + data.clear(); + } + + /// Don't take data ownership + /// img_data_size is mandatory in case of encoded images. + Image(Format img_format, const uint8_t* img_data, size_t img_data_size, + Dim2d img_dim = {}) + : format{ img_format } + , dim(img_dim) + , data{ img_data } + , data_size{ img_data_size } + { + } + + /// Utility factory methods + static Image encoded(const std::vector& data) + { + return Image(Format::encoded, data); + } + + /// Image format + Format format; + + /// Image dimensions (for raw images) + Dim2d dim; + + /// Region of interest inside the image (all if empty) + Rect roi{}; + + /// Custom attributes. + /// This is ai-app specific and allows to specify custom data formats. + std::string attributes; + + /// Pointer to image data (no ownership of the data). + const uint8_t* data; + + /// Size of image data. Mandatory for encoded images. + size_t data_size; + + /// Additional optional information about the image. + /// May be required by some aiapps. + Landmarks landmarks; + }; + + /// Abstract image-based AiApp + class Image_based : virtual public Aiapp { + public: + /// @return supported image formats (ordered by preference) + virtual std::vector image_formats() const = 0; + }; + +} // namespace ai_app +} // namespace lpdnn diff --git a/src/pifpaf_decoder/math_helpers.cpp b/src/pifpaf_decoder/math_helpers.cpp new file mode 100644 index 00000000..f7634da6 --- /dev/null +++ b/src/pifpaf_decoder/math_helpers.cpp @@ -0,0 +1,25 @@ +#include "math_helpers.hpp" + +void vfill(float* x, unsigned long n, float v) +{ + // Slow version + for (unsigned long i = 0; i < n; ++i) { + x[i] = v; + } +} + +void vmul(const float* a, const float* b, float* c, unsigned long n) +{ + // Slow version + for (unsigned long i = 0; i < n; ++i) { + c[i] = a[i] * b[i]; + } +} + +void vsmul(const float* a, float b, float* c, unsigned long n) +{ + // Slow version + for (unsigned long i = 0; i < n; ++i) { + c[i] = a[i] * b; + } +} diff --git a/src/pifpaf_decoder/math_helpers.hpp b/src/pifpaf_decoder/math_helpers.hpp new file mode 100644 index 00000000..15dcb087 --- /dev/null +++ b/src/pifpaf_decoder/math_helpers.hpp @@ -0,0 +1,10 @@ +#pragma once + +// x[i] = v +void vfill(float* x, unsigned long n, float v); + +// c[i] = a[i] * b[i] +void vmul(const float* a, const float* b, float* c, unsigned long n); + +// c[i] = a[i] * b +void vsmul(const float* a, float b, float* c, unsigned long n); diff --git a/src/pifpaf_decoder/object_detection.hpp b/src/pifpaf_decoder/object_detection.hpp new file mode 100644 index 00000000..91c8f3c0 --- /dev/null +++ b/src/pifpaf_decoder/object_detection.hpp @@ -0,0 +1,50 @@ +/// +/// Ai-app interface for object detection +/// +/// \copyright 2018 NVISO SA. All rights reserved. +/// \license This project is released under the XXXXXX License. +/// + +#pragma once + +#include "image_based.hpp" + +namespace lpdnn { +namespace ai_app { + + /// Object detection AiApp + class Object_detection : virtual public Image_based { + public: + struct Result { + struct Item { + float confidence; + int class_index; + Rect bounding_box; + Landmarks landmarks; + }; + + bool success{}; + std::vector items; + }; + + /// Set minimum detectable object size + /// @return true if success + virtual bool set_min_size(Dim2d minSize) = 0; + + /// Set maximum detectable object size + /// @return true if success + virtual bool set_max_size(Dim2d maxSize) = 0; + + /// Perform inference. + virtual Result execute(const Image& input) = 0; + + /// @return Names of classes + virtual std::vector classes() = 0; + + /// @return our aiapp class id + const char* class_id() const override { return ai_class_id; } + static constexpr char const* ai_class_id = "com_bonseyes::object_detection"; + }; + +} // namespace ai_app +} // namespace lpdnn diff --git a/src/pifpaf_decoder/openpifpaf_postprocessor.cpp b/src/pifpaf_decoder/openpifpaf_postprocessor.cpp new file mode 100644 index 00000000..d0a52617 --- /dev/null +++ b/src/pifpaf_decoder/openpifpaf_postprocessor.cpp @@ -0,0 +1,930 @@ +// Heavily modified from openpifpaf/cpp/example. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "math_helpers.hpp" +#include "openpifpaf_postprocessor.hpp" + +struct Occupancy { + // self.reduction = reduction + // self.min_scale_reduced = min_scale / reduction + constexpr static float reduction = 2.f; + constexpr static float min_scale_reduced = 4.f / reduction; + size_t d0, d1, d2; // c h w + std::vector occupancy_view; + + Occupancy(size_t d0_, size_t d1_, size_t d2_) + : d0(d0_) + , d1(d1_) + , d2(d2_) + , occupancy_view(d0_ * d1_ * d2_) + { + } + + bool fuzz_get(size_t f, float y, float x) + { + if (f >= d0) + return true; + + // scalar_nonzero_clipped_with_reduction + float xx = std::min((float)d2 - 1, std::max(0.f, x / reduction)); + float yy = std::min((float)d1 - 1, std::max(0.f, y / reduction)); + + return get(f, yy, xx); + } + + bool get(size_t d0_, size_t d1_, size_t d2_) + { + return occupancy_view[(d1 * d2) * d0_ + d2 * d1_ + d2_]; + } + + void set(size_t d0_, size_t d1_, size_t d2_) + { + occupancy_view[(d1 * d2) * d0_ + d2 * d1_ + d2_] = 1; + } +}; + +namespace lpdnn { +namespace aiapp_impl { + + constexpr int OpenPifPafPostprocessor::bones[19][2] = { + { 16, 14 }, + { 14, 12 }, + { 17, 15 }, + { 15, 13 }, + { 12, 13 }, + { 6, 12 }, + { 7, 13 }, + { 6, 7 }, + { 6, 8 }, + { 7, 9 }, + { 8, 10 }, + { 9, 11 }, + { 2, 3 }, + { 1, 2 }, + { 1, 3 }, + { 2, 4 }, + { 3, 5 }, + { 4, 6 }, + { 5, 7 }, + }; + + struct to_point { + int field_id; + bool possitve; + }; + + auto BY_SOURCE_MAP = [] { + // print(self.by_source) + // for i in range(17): + // for (end_i), (caf_i, connect) in self.by_source[i].items(): + // data = f'to_point{{{caf_i}, {"true" if connect else "false"}}}' + // print(f'smap[{i}][{end_i}] = {data};') + std::array>, 17> smap; + smap[0][1] = to_point{ 13, true }; + smap[0][2] = to_point{ 14, true }; + smap[1][2] = to_point{ 12, true }; + smap[1][0] = to_point{ 13, false }; + smap[1][3] = to_point{ 15, true }; + smap[2][1] = to_point{ 12, false }; + smap[2][0] = to_point{ 14, false }; + smap[2][4] = to_point{ 16, true }; + smap[3][1] = to_point{ 15, false }; + smap[3][5] = to_point{ 17, true }; + smap[4][2] = to_point{ 16, false }; + smap[4][6] = to_point{ 18, true }; + smap[5][11] = to_point{ 5, true }; + smap[5][6] = to_point{ 7, true }; + smap[5][7] = to_point{ 8, true }; + smap[5][3] = to_point{ 17, false }; + smap[6][12] = to_point{ 6, true }; + smap[6][5] = to_point{ 7, false }; + smap[6][8] = to_point{ 9, true }; + smap[6][4] = to_point{ 18, false }; + smap[7][5] = to_point{ 8, false }; + smap[7][9] = to_point{ 10, true }; + smap[8][6] = to_point{ 9, false }; + smap[8][10] = to_point{ 11, true }; + smap[9][7] = to_point{ 10, false }; + smap[10][8] = to_point{ 11, false }; + smap[11][13] = to_point{ 1, false }; + smap[11][12] = to_point{ 4, true }; + smap[11][5] = to_point{ 5, false }; + smap[12][14] = to_point{ 3, false }; + smap[12][11] = to_point{ 4, false }; + smap[12][6] = to_point{ 6, false }; + smap[13][15] = to_point{ 0, false }; + smap[13][11] = to_point{ 1, true }; + smap[14][16] = to_point{ 2, false }; + smap[14][12] = to_point{ 3, true }; + smap[15][13] = to_point{ 0, true }; + smap[16][14] = to_point{ 2, true }; + return smap; + }(); + + static const int C = 17; + static const float STRIDE = 8.0f; + static const float seedThreshold = 0.3f; // 0.5 + //static const float keypointThreshold = 0.15f; + static const float instanceThreshold = 0.2f; + + static void scalarSquareAddConstant(float* field, + int fieldH, + int fieldW, + const std::vector& x, + const std::vector& y, + const std::vector& width, + const std::vector& v) + { + // minx_np = np.round(x_np - width_np).astype(np.int) + // minx_np = np.clip(minx_np, 0, field.shape[1] - 1) + std::vector minx(x.size()); + for (size_t i = 0; i < x.size(); ++i) { + minx[i] = std::min(fieldW - 1, std::max(0, (int)std::round(x[i] - width[i]))); + } + + // miny_np = np.round(y_np - width_np).astype(np.int) + // miny_np = np.clip(miny_np, 0, field.shape[0] - 1) + std::vector miny(y.size()); + for (size_t i = 0; i < y.size(); ++i) { + miny[i] = std::min(fieldH - 1, std::max(0, (int)std::round(y[i] - width[i]))); + } + + // maxx_np = np.round(x_np + width_np).astype(np.int) + // maxx_np = np.clip(maxx_np + 1, minx_np + 1, field.shape[1]) + std::vector maxx(x.size()); + for (size_t i = 0; i < x.size(); ++i) { + maxx[i] = std::min(fieldW, std::max(minx[i] + 1, (int)std::round(x[i] + width[i]) + 1)); + } + + // maxy_np = np.round(y_np + width_np).astype(np.int) + // maxy_np = np.clip(maxy_np + 1, miny_np + 1, field.shape[0]) + std::vector maxy(y.size()); + for (size_t i = 0; i < y.size(); ++i) { + maxy[i] = std::min(fieldH, std::max(miny[i] + 1, (int)std::round(y[i] + width[i]) + 1)); + } + + // for i in range(minx.shape[0]): + // for xx in range(minx[i], maxx[i]): + // for yy in range(miny[i], maxy[i]): + // field[yy, xx] += v[i] + for (size_t i = 0; i < minx.size(); ++i) { + for (int yy = miny[i]; yy < maxy[i]; ++yy) { + for (int xx = minx[i]; xx < maxx[i]; ++xx) { + field[yy * fieldW + xx] += v[i]; + } + } + } + } + + static void scalarSquareAddGaussWitMax(float* field, + int fieldH, + int fieldW, + const std::vector& x, + const std::vector& y, + const std::vector& sigma_, + const std::vector& v, + float truncate, + float max_val = 1.0f) + { + // // ganler! + // assert(v.size() == x.size() == y.size() == sigma_.size()); + for (size_t i = 0; i < x.size(); ++i) { + float csigma = sigma_[i]; + float truncate_csigma = csigma * truncate; + float cx = x[i]; + float cy = y[i]; + float cv = v[i]; + const auto clip = [](float val, float low, float high) { + return std::max(low, std::min(high, val)); + }; + + // printf("%f, %f, %f, %f, %f\n", cx, cy, csigma, truncate_csigma, max_val); + const int64_t minx = clip(cx - truncate_csigma, 0, fieldW - 1); + const int64_t maxx = clip(cx + truncate_csigma + 1, minx + 1, fieldW); + const int64_t miny = clip(cy - truncate_csigma, 0, fieldH - 1); + const int64_t maxy = clip(cy + truncate_csigma + 1, miny + 1, fieldH); + // std::cout << minx << '\t' << maxx << '\t' << miny << '\t' << maxy << '\n'; + // printf("%lli, %lli, %lli, %lli\n", minx, maxx, miny, maxy); + + for (int64_t xx = minx; xx < maxx; ++xx) { + float deltax2 = (xx - cx) * (xx - cx); + for (int64_t yy = miny; yy < maxy; ++yy) { + float deltay2 = (yy - cy) * (yy - cy); + + if (deltax2 + deltay2 > truncate_csigma * truncate_csigma) { + continue; + } + + const auto approx_exp = [](float x) { + if (x > 2 || x < -2) + return 0.f; + x = 1.f + x / 8; + x *= x; + x *= x; + x *= x; + return x; + }; + float vv = (deltax2 < 0.25 && deltay2 < 0.25) ? cv : cv * approx_exp(-0.5 * (deltax2 + deltay2) / (csigma * csigma)); + field[yy * fieldW + xx] += vv; + field[yy * fieldW + xx] = std::min(max_val, field[yy * fieldW + xx]); + } + } + } + } + + static void scalarSquareAddSingle(Occupancy& field, + int field_idx, + int fieldH, + int fieldW, + float x, + float y, + float width, + float reduction = 1.0, + float min_scaled_reduced = 0.0) + { + if (reduction != 1.0) { + x /= reduction; + y /= reduction; + width = std::max(min_scaled_reduced, width / reduction); + } + + // minx = max(0, int(round(x - width))) + // miny = max(0, int(round(y - width))) + auto minx = std::min(fieldW - 1, std::max(0, (int)(x - width))); + auto miny = std::min(fieldH - 1, std::max(0, (int)(y - width))); + + // maxx = max(minx + 1, min(field.shape[1], int(round(x + width)) + 1)) + // maxy = max(miny + 1, min(field.shape[0], int(round(y + width)) + 1)) + auto maxx = std::min(fieldW, std::max(minx + 1, std::min(fieldW, (int)(x + width) + 1))); + auto maxy = std::min(fieldH, std::max(miny + 1, std::min(fieldH, (int)(y + width) + 1))); + + // field[miny:maxy, minx:maxx] += value + for (auto yy = miny; yy < maxy; ++yy) { + for (auto xx = minx; xx < maxx; ++xx) { + field.set(field_idx, yy, xx); + } + } + } + + OpenPifPafPostprocessor::Target_intensity + OpenPifPafPostprocessor::targetIntensities(const std::vector& pif, + float v_th, bool coreOnly) + { + constexpr float PIF_NN = 16.0f; + + const size_t targets_stride_0 = H_hr * W_hr; + const size_t scales_stride_0 = H_hr * W_hr; + const size_t ns_stride_0 = H_hr * W_hr; + + // These tensors need to be emptied out on each frame. + vfill(targetsCoreOnly.data(), targetsCoreOnly.size(), 0.0f); + vfill(targets.data(), targets.size(), 0.0f); + vfill(scales.data(), scales.size(), 0.0f); + vfill(ns.data(), ns.size(), 0.0f); + + std::vector v; + std::vector x; + std::vector y; + std::vector s; + + for (int i = 0; i < C; ++i) { + // Threshold pif[i, ...], which is a (4, h, w) tensor. Copy the values + // that are over the threshold into four vectors: v, x, y, s. Multiply + // x, y, s with the stride. + // + // v, x, y, s = p[:, p[0] > v_th] + // x = x * self.stride + // y = y * self.stride + // s = s * self.stride + v.clear(); + x.clear(); + y.clear(); + s.clear(); + const size_t pifOffset = i * pif_stride_0; + const size_t xOffset = pifOffset + pif_stride_1; + const size_t yOffset = xOffset + pif_stride_1; + const size_t sOffset = yOffset + pif_stride_1 * 2; + for (int j = 0; j < H * W; ++j) { + float p = pif[pifOffset + j]; + if (p > v_th) { + v.push_back(p); + x.push_back(pif[xOffset + j] * STRIDE); + y.push_back(pif[yOffset + j] * STRIDE); + s.push_back(std::max(1., 0.5 * pif[sOffset + j] * STRIDE)); + } + } + + /* + // For debugging + printf("iteration: %d\n", i); + printf("v:\n"); for (auto n : v) printf("%f, ", n); printf("\n"); + printf("x:\n"); for (auto n : x) printf("%f, ", n); printf("\n"); + printf("y:\n"); for (auto n : y) printf("%f, ", n); printf("\n"); + printf("s:\n"); for (auto n : s) printf("%f, ", n); printf("\n"); + */ + + // Create a high-resolution confidence map for this keypoint. + // std::cout << x.size() << '\t'<< y.size() << '\t'<< v.size() << '\t' << s.size() << '\n'; + // v / pif_nn + std::vector v_over_pif_nn(v.size()); + vsmul(v.data(), 1.0f / PIF_NN, v_over_pif_nn.data(), v.size()); + + // The original code computed the "core only" version in a separate step + // but that duplicates a bunch of work, so we do it at the same time. + const auto tco = targetsCoreOnly.data() + i * targets_stride_0; + scalarSquareAddGaussWitMax(tco, H_hr, W_hr, x, y, s, v_over_pif_nn, 1.0f, 1.0f); + + size_t cnt = 0; + for (size_t dd = 0; dd < targets_stride_0; ++dd) { + if (tco[dd] > 0.01) + ++cnt; + } + // std::cout << targets_stride_0 << '\t' << i << '\t'<< cnt << '\t' << tco[0] << '\n'; + + // s * v + std::vector s_times_v(v.size()); + vmul(s.data(), v.data(), s_times_v.data(), v.size()); + + const auto t = targets.data() + i * targets_stride_0; + const auto scale = scales.data() + i * scales_stride_0; + const auto n = ns.data() + i * ns_stride_0; + scalarSquareAddGaussWitMax(t, H_hr, W_hr, x, y, s, v_over_pif_nn, 1.0f); + scalarSquareAddConstant(scale, H_hr, W_hr, x, y, s, s_times_v); + scalarSquareAddConstant(n, H_hr, W_hr, x, y, s, v); + } + + // m = ns > 0 + // scales[m] = scales[m] / ns[m] + for (size_t i = 0; i < scales.size(); ++i) { + const auto d = ns[i]; + if (d > 0) { + scales[i] /= d; + } + } + return Target_intensity{ targets, scales, targetsCoreOnly }; + } + + std::tuple + OpenPifPafPostprocessor::growConnectionBlend(float x, float y, float s, const std::array, 9>& paf_field) + { + // # source value + // paf_field = paf_center(paf_field, xy[0], xy[1], sigma=2.0) + // if paf_field.shape[1] == 0: + // return 0, 0, 0 + const float sigma = 2.0 * s; + const float sigma2 = 0.25 * s * s; + size_t score_1_i = 0, score_2_i = 0; + float score_1 = 0, score_2 = 0; + + const int paf_stride = paf_field.front().size(); + for (int i = 0; i < paf_stride; ++i) { + if ((paf_field[1][i] < x - sigma) || (paf_field[1][i] > x + sigma) || (paf_field[2][i] < y - sigma) || (paf_field[2][i] > y + sigma)) + continue; + float d2 = (paf_field[1][i] - x) * (paf_field[1][i] - x) + (paf_field[2][i] - y) * (paf_field[2][i] - y); + float score = std::exp(-0.5 * d2 / sigma2) * paf_field[0][i]; + if (score >= score_1) { + score_2_i = score_1_i; + score_2 = score_1; + score_1_i = i; + score_1 = score; + } else if (score > score_2) { + score_2_i = i; + score_2 = score; + } + } + + if (score_1 == 0) + return { 0, 0, 0, 0 }; + + auto entry_1 = std::make_tuple(paf_field[3][score_1_i], paf_field[4][score_1_i], paf_field[6][score_1_i], paf_field[8][score_1_i]); + + auto [ex1, ey1, eb1, es1] = entry_1; + if (score_2 < 0.01 || score_2 < 0.5 * score_1) { + return { ex1, ey1, es1, score_1 * 0.5 }; + } + + // blend... + auto entry_2 = std::make_tuple(paf_field[3][score_2_i], paf_field[4][score_2_i], paf_field[6][score_2_i], paf_field[8][score_2_i]); + auto [ex2, ey2, eb2, es2] = entry_2; + + float blend_d2 = (ex1 - ex2) * (ex1 - ex2) + (ey1 - ey2) * (ey1 - ey2); + if (blend_d2 > ((es1 * es1) / 4)) { + return { ex1, ey1, es1, score_1 * 0.5 }; + } + + return { + // xysv + (score_1 * ex1 + score_2 * ex2) / (score_1 + score_2), + (score_1 * ey1 + score_2 * ey2) / (score_1 + score_2), + (score_1 * es1 + score_2 * es2) / (score_1 + score_2), + 0.5 * (score_1 + score_2), + }; + } + + using xysv = std::optional>; + + struct queue_item { // -score, xyv, start_i, end_i + template + queue_item(Args&&... args) + : data(std::make_tuple(std::forward(args)...)) + { + } + std::tuple data; + friend bool operator>(const queue_item& l, const queue_item& r) + { + return std::get<0>(l.data) >= std::get<0>(r.data); + } + friend bool operator<(const queue_item& l, const queue_item& r) + { + return std::get<0>(l.data) < std::get<0>(r.data); + } + }; + + void OpenPifPafPostprocessor::grow(Annotation& ann, + const FBContainer& pafForward, + const FBContainer& pafBackward) + { + // frontierActive = true; + // blockFrontier.clear(); + std::set> in_frontier{}; + std::priority_queue, std::greater> frontier; + + const auto add_to_frontier = [&](size_t start_i) { + for (const auto& [end_i, to_p] : BY_SOURCE_MAP[start_i]) { + int caf_i = to_p.field_id; + // std::cout << "----> " << start_i << '\t' << end_i << '\t' << caf_i << '\n'; + if (ann.keypoints[3 * end_i + 2] > 0.0) { + // std::cout << "CONTINUE start_i = " << start_i << '\n'; + continue; + } + // found! + if (in_frontier.cend() != in_frontier.find(std::make_pair(start_i, end_i))) { + // std::cout << "CONTINUE map already got you!\n"; + continue; + } + + float max_possible_score = std::sqrt(ann.keypoints[3 * start_i + 2]); + // std::cout << "put " << start_i << ' ' << end_i << "\tscore = " << max_possible_score << "\n"; + frontier.emplace(-max_possible_score, std::nullopt, start_i, end_i); + in_frontier.emplace(start_i, end_i); + } + }; + + const auto frontier_get = [&]() -> std::optional { + while (!frontier.empty()) { + auto entry = frontier.top(); + frontier.pop(); + + { + auto [_a, _b, start_i, end_i] = entry.data; + // std::cout << "POP " << start_i << ' ' << end_i << " has val = " << std::get<1>(entry.data).has_value() << '\n'; + } + + if (std::get<1>(entry.data).has_value()) { + // std::cout << "RETURN \n"; + return entry; + } + + auto [_a, _b, start_i, end_i] = entry.data; + if (ann.keypoints[end_i * 3 + 2] > 0.0) + continue; + + // connection_value(self, ann, caf_scored, start_i, end_i, *, reverse_match=True): + auto new_xysv = [&](size_t start_i, size_t end_i) -> xysv { + const auto& point = BY_SOURCE_MAP[start_i][end_i]; + int caf_i = point.field_id; + bool is_forward = point.possitve; + const auto& caf_f = is_forward ? pafForward[caf_i] : pafBackward[caf_i]; // [19, 9, N] + const auto& caf_b = is_forward ? pafBackward[caf_i] : pafForward[caf_i]; + auto [x, y, v] = std::make_tuple(ann.keypoints[start_i * 3], ann.keypoints[start_i * 3 + 1], ann.keypoints[start_i * 3 + 2]); + float xy_scale_s = std::max(0.f, ann.jointScales[start_i]); + const auto [nx, ny, ns, nv] = growConnectionBlend(x, y, xy_scale_s, caf_f); + // std::cout << "NEW:\t" << nx << '\t' << ny << '\t' << ns << '\t' << nv << '\n'; + + if (nv == 0) + return std::nullopt; + + float keypoint_score = std::sqrt(nv * v); + if (keypoint_score < keypointThreshold) + return std::nullopt; + // Use relative threashold + constexpr float keypoint_threshold_rel = 0.5; + if (keypoint_score < v * keypoint_threshold_rel) + return std::nullopt; + + float xy_scale_t = std::max(0.f, ns); + // if self.reverse_match and reverse_match -> true + const auto [rx, ry, rs, rv] = growConnectionBlend(nx, ny, xy_scale_t, caf_b); + // std::cout << "REVERSE:\t" << rx << '\t' << ry << '\t' << rs << '\t' << rv << '\n'; + if (rs == 0 || std::abs(x - rx) + std::abs(y - ry) > xy_scale_s) + return std::nullopt; + + return std::make_tuple(nx, ny, ns, keypoint_score); + }(start_i, end_i); + + if (std::nullopt == new_xysv) + continue; + + frontier.emplace(-std::get<3>(new_xysv.value()), new_xysv, start_i, end_i); + } + return std::nullopt; + }; + + for (size_t joint_i = 0; joint_i < N_PIFPAF_KEYPOINTS; ++joint_i) { + if (ann.keypoints[3 * joint_i + 2] != 0.0) { + // std::cout << "-----joint_i " << joint_i << "\n"; + add_to_frontier(joint_i); + } + } + + while (true) { + auto entry = frontier_get(); + if (!entry.has_value()) + break; + + auto [_, new_xysv, jsi, jti] = entry.value().data; + + // std::cout << "jsi = " << jsi << ", jti = " << jti << ", ann.data[jti, 2] = " << ann.keypoints[jti * 3 + 2] << '\n'; + if (ann.keypoints[jti * 3 + 2] > 0.0) + continue; + + auto [nx, ny, ns, nv] = new_xysv.value(); + ann.keypoints[jti * 3 + 0] = nx; + ann.keypoints[jti * 3 + 1] = ny; + ann.keypoints[jti * 3 + 2] = nv; + ann.jointScales[jti] = ns; + add_to_frontier(jti); + } + } + + std::vector OpenPifPafPostprocessor::softNMS(std::vector& annotations) + { + float maxx = 0.0f; + float maxy = 0.0f; + for (auto& ann : annotations) { + for (int k = 0; k < N_PIFPAF_KEYPOINTS; ++k) { + auto x = ann.keypoints[k * 3]; + auto y = ann.keypoints[k * 3 + 1]; + if (x > maxx) { + maxx = x; + } + if (y > maxy) { + maxy = y; + } + } + } + + const auto h = (int)(maxy + 1); + const auto w = (int)(maxx + 1); + Occupancy occupied(17, h, w); + + std::vector sorted(annotations.size()); + std::iota(sorted.begin(), sorted.end(), 0); + std::sort(sorted.begin(), sorted.end(), [annotations](int const& a, int const& b) { + return annotations[a].score() > annotations[b].score(); + }); + + for (auto a : sorted) { + Annotation& ann = annotations[a]; + for (int k = 0; k < N_PIFPAF_KEYPOINTS; ++k) { + const auto x = ann.keypoints[k * 3]; + const auto y = ann.keypoints[k * 3 + 1]; + const auto v = ann.keypoints[k * 3 + 2]; + if (v == 0) { + continue; + } + + const auto i = std::min(std::max(0, (int)std::round(x)), w - 1); + const auto j = std::min(std::max(0, (int)std::round(y)), h - 1); + + if (occupied.fuzz_get(k, j, i)) { + ann.keypoints[k * 3 + 2] = 0.0f; + } else { + scalarSquareAddSingle(occupied, k, h, w, x, y, ann.jointScales[k]); + } + } + } + + std::vector filtered; + for (auto& ann : annotations) { + for (int k = 0; k < N_PIFPAF_KEYPOINTS; ++k) { + if (ann.keypoints[k * 3 + 2] > 0.0f) { + filtered.push_back(ann); + break; + } + } + } + return filtered; + + // Note: The original code sorts here on the score (descending), but + // we sort again later on so it's a bit quicker if we skip that here. + } + + void OpenPifPafPostprocessor::initTensors(int tensorWidth, int tensorHeight) + { + H = tensorHeight; + W = tensorWidth; + H_hr = (H - 1) * (int)STRIDE + 1; + W_hr = (W - 1) * (int)STRIDE + 1; + + pif_stride_1 = H * W; + pif_stride_0 = 5 * pif_stride_1; + + pifhr_stride_1 = W_hr; + pifhr_stride_0 = H_hr * pifhr_stride_1; + + const int shape = C * H_hr * W_hr; + targetsCoreOnly = std::vector(shape); + targets = std::vector(shape); + scales = std::vector(shape); + ns = std::vector(shape); + } + + ai_app::Object_detection::Result OpenPifPafPostprocessor::postprocess( + int inputWidth, int inputHeight, + int tensorWidth, int tensorHeight, + const std::vector& pif, + const std::vector& paf) + { + // Allocate the intermediate tensors the first time or when the size changes. + if (W != tensorWidth || H != tensorHeight) { + initTensors(tensorWidth, tensorHeight); + } + + const auto result_tuple = targetIntensities(pif); + const auto& pifhr = std::get<0>(result_tuple); + const auto& pifhrScales = std::get<1>(result_tuple); + const auto& pifhrCore = std::get<2>(result_tuple); + + // (17, 5, H, W) + // pif: [v, x, y, _, s] + const size_t pif_ch = 5, hw_size = H * W; + const size_t pif_shard_size = pif_ch * hw_size; + + // BEGIN: seeds = utils.CifSeeds(cifhr.accumulated).fill(fields, self.cif_metas) + std::vector> seeds{}; + + const float maxx = W_hr - 0.51, maxy = H_hr - 0.51; + for (size_t field_i = 0; field_i < N_PIFPAF_KEYPOINTS; ++field_i) { + // Search qualified entries. + size_t this_field_offset = field_i * pif_shard_size; + for (size_t hw_index = 0; hw_index < hw_size; ++hw_index) { + size_t vindex = hw_index + this_field_offset; + if (pif[vindex] > seedThreshold) { + float c = pif[vindex], x = pif[vindex + hw_size], y = pif[vindex + 2 * hw_size], s = pif[vindex + 4 * hw_size]; + // scalar_values + if (x < -0.49 || y < -0.49 || x > maxx || y > maxy) { + continue; + } + float v = pifhrCore[field_i * W_hr * H_hr + ((size_t)(y * STRIDE + 0.5) * W_hr) + (size_t)(x * STRIDE + 0.5)]; + // scalar_values :: over. + + v = 0.9 * v + 0.1 * c; + // printf("%f %f, %f, %f, %f\n", v, c, x, y, s); + + // pass or not? + if (v > seedThreshold) { + // ok, you pass. -> seeds -> [x, y, v, s] + seeds.emplace_back(v, field_i, x * STRIDE, y * STRIDE, s * STRIDE); + } + } + } + } + // std::cout << seeds.size() << "seeds size\n"; + // END: seeds = utils.CifSeeds(cifhr.accumulated).fill(fields, self.cif_metas) + + // BEGIN: caf_scored = utils.CafScored(cifhr.accumulated).fill(fields, self.caf_metas) + // (19, 9, DYNAMICs) + constexpr size_t paf_ch = 9; + const size_t paf_shard_size = paf_ch * hw_size; + // (19, 9, H, W)... + FBContainer forward{}, backward{}; + for (size_t field_i = 0; field_i < forward.size(); ++field_i) { + constexpr float PAF_SCORE_THRE = 0.2; + constexpr float CIF_FLOOR = 0.1; + // filter! + for (size_t hw_idx = 0; hw_idx < hw_size; ++hw_idx) { + const size_t paf_conf_idx = hw_idx + field_i * paf_shard_size; + const auto conf = paf[paf_conf_idx]; + if (conf > PAF_SCORE_THRE) { + // values in this line... + std::array this_ch{}; + for (size_t chidx = 0; chidx < this_ch.size(); ++chidx) { + this_ch[chidx] = paf[paf_conf_idx + chidx * hw_size]; + if (chidx != 0) + this_ch[chidx] *= STRIDE; + } + + auto backward_pif_ch = bones[field_i][0] - 1; + auto forward_pif_ch = bones[field_i][1] - 1; + // backward pass. + constexpr std::array BACKWARD_IDX{ 0, 3, 4, 1, 2, 6, 5, 8, 7 }; + constexpr std::array FORWARD_IDX{ 0, 1, 2, 3, 4, 5, 6, 7, 8 }; + + // restore... (yet another filtering...) + // cifhr_t = scalar_values(self.cifhr[joint_t], nine[3], nine[4], default=0.0) + // nine[0] = nine[0] * (self.cif_floor + (1.0 - self.cif_floor) * cifhr_t) + const auto pass = [&this_ch, maxx, maxy, this, field_i, &pifhrCore](const auto& idx_mapping, FBContainer& cont, size_t pif_field_idx) { + float x = this_ch[idx_mapping[3]], y = this_ch[idx_mapping[4]]; + if (!(x < -0.49 || y < -0.49 || x > maxx || y > maxy)) { + // std::cout << field_i << "\tXY = \t"<< x << '\t' << y << '\t' << (size_t)(x + 0.5) << '\t' << (size_t)(y + 0.5) << "\t MAX HW: " << W_hr << ' ' << H_hr << std::endl; + float cifhr_t = pifhrCore[pif_field_idx * W_hr * H_hr + ((size_t)(y + 0.5) * W_hr) + (size_t)(x + 0.5)]; + float new_v = this_ch[0] * (CIF_FLOOR + (1 - CIF_FLOOR) * cifhr_t); + if (new_v > PAF_SCORE_THRE) { + // forward pass. + for (size_t fwd_idx = 0; fwd_idx < cont.front().size(); ++fwd_idx) { + // restore! + cont[field_i][fwd_idx].push_back(this_ch[idx_mapping[fwd_idx]]); + } + cont[field_i][0].back() = new_v; + } + } + }; + + pass(BACKWARD_IDX, backward, backward_pif_ch); + pass(FORWARD_IDX, forward, forward_pif_ch); + } + } + } + // for (const auto& f : forward) { + // std::cout << "(" << f.size() << ", " << f.front().size() << "), "; + // } + // std::cout << '\n'; + // for (const auto& f : backward) { + // std::cout << "(" << f.size() << ", " << f.front().size() << "), "; + // } + // std::cout << '\n'; + // END: caf_scored = utils.CafScored(cifhr.accumulated).fill(fields, self.caf_metas) + std::sort(seeds.begin(), seeds.end(), std::greater{}); + + // occupacy map. + // std::cout << C << ' ' << H_hr << ' ' << W_hr << '\n'; + Occupancy occupied(C, H_hr, W_hr); + std::vector annotations; + for (const auto& [v, f, x, y, s] : seeds) { + if (occupied.fuzz_get(f, y, x)) { + continue; + } + + Annotation ann(f, x, y, v); + ann.jointScales[f] = s; + grow(ann, forward, backward); + annotations.push_back(ann); + + for (int i = 0; i < N_PIFPAF_KEYPOINTS; ++i) { + const auto ax = ann.keypoints[i * 3]; + const auto ay = ann.keypoints[i * 3 + 1]; + const auto av = ann.keypoints[i * 3 + 2]; + if (av == 0) { + continue; + } + + const auto width = ann.jointScales[i]; + scalarSquareAddSingle(occupied, i, H_hr, W_hr, ax, ay, width, Occupancy::reduction, Occupancy::min_scale_reduced); // width is sigma... + } + } + + // This returns two lists that each contain 19 tensors of shape (7, ?) + // where the second dimension can vary in size (depends on thresholds). + // const auto pt = scorePafTarget(paf, pifhr); + // const auto pafForward = std::get<0>(pt); + // const auto pafBackward = std::get<1>(pt); + + /* + // For debugging + printf("pafForward:\n"); + for (auto& i : pafForward) { + for (auto j : i) { printf("%f, ", j); } printf("\n"); + } + printf("\npafBackward:\n"); + for (auto i : pafBackward) { + for (auto& j : i) { printf("%f, ", j); } printf("\n"); + } + */ + + // auto annotations = decodeAnnotations(seeds, pifhr, pifhrScales, pifhrCore, pafForward, pafBackward); + + // Scale to input size + // for (auto& ann : annotations) { + // for (int k = 0; k < N_PIFPAF_KEYPOINTS; ++k) { + // ann.keypoints[k*3 ] *= STRIDE; + // ann.keypoints[k*3 + 1] *= STRIDE; + // std::cout << "--> Scaled: " < thresholded; + for (auto& ann : annotations) { + for (int k = 0; k < N_PIFPAF_KEYPOINTS; ++k) { + if (ann.keypoints[k * 3 + 2] < keypointThreshold) { + ann.keypoints[k * 3 + 2] = 0.0f; + } + } + if (ann.score() >= instanceThreshold) { + thresholded.push_back(ann); + } + } + + std::sort(thresholded.begin(), thresholded.end(), [](const Annotation& a, const Annotation& b) { + return a.score() > b.score(); + }); + + // // Convert to normalized coordinates + // for (auto& ann : thresholded) { + // for (int k = 0; k < N_PIFPAF_KEYPOINTS; ++k) { + // ann.keypoints[k*3 ] /= inputWidth; + // ann.keypoints[k*3 + 1] /= inputHeight; + // } + // } + + /* + // For debugging + for (auto ann : thresholded) { + printf("Keypoints:\n"); + for (auto k : ann.keypoints) { + printf("%f, ", k); + } + printf("\nJoint scales:\n"); + for (auto k : ann.jointScales) { + printf("%f, ", k); + } + printf("\n"); + } + */ + + ai_app::Object_detection::Result result; + result.success = true; + for (auto& ann : thresholded) { + ai_app::Landmarks landmarks; + landmarks.type = "body_pose_pifpaf"; + + int minx = std::numeric_limits::max(), + miny = std::numeric_limits::max(), + maxx_ = std::numeric_limits::min(), + maxy_ = std::numeric_limits::min(); + + for (int k = 0; k < N_PIFPAF_KEYPOINTS; ++k) { + const int x = ann.keypoints[k * 3]; + const int y = ann.keypoints[k * 3 + 1]; + const auto v = ann.keypoints[k * 3 + 2]; + + if (v > 0.0f) { + if (x < minx) { + minx = x; + } + if (x > maxx_) { + maxx_ = x; + } + if (y < miny) { + miny = y; + } + if (y > maxy_) { + maxy_ = y; + } + } + + ai_app::Landmark landmark{}; + landmark.confidence = v; + landmark.position.x = x; + landmark.position.y = y; + landmarks.points.push_back(landmark); + } + + ai_app::Object_detection::Result::Item item; + item.confidence = ann.score(); + item.class_index = 1; + item.bounding_box.origin.x = minx; + item.bounding_box.origin.y = miny; + item.bounding_box.size.x = maxx_ - minx; + item.bounding_box.size.y = maxy_ - miny; + item.landmarks = landmarks; + + result.items.push_back(item); + } + return result; + } + +} +} diff --git a/src/pifpaf_decoder/openpifpaf_postprocessor.hpp b/src/pifpaf_decoder/openpifpaf_postprocessor.hpp new file mode 100644 index 00000000..6aa28353 --- /dev/null +++ b/src/pifpaf_decoder/openpifpaf_postprocessor.hpp @@ -0,0 +1,124 @@ +#pragma once + +#include +#include +#include +#include + +#include "object_detection.hpp" + +namespace lpdnn { +namespace aiapp_impl { + + using FBContainer = std::array, 9>, 19>; + + /** + Post-processing logic for OpenPifPaf + + \note This object caches the big tensors to save on memory allocations. + This means it's best to make one instance of this class and keep using it. + For the most efficient results, make sure the input tensors are always the + same width and height. + + \note This code is not threadsafe. Don't call it from multiple threads at + the same time. If you must use multiple threads, give each thread its own + instance of this class. + */ + class OpenPifPafPostprocessor { + public: + OpenPifPafPostprocessor() + : H(0) + , W(0) + { + } + + public: + static constexpr int N_PIFPAF_KEYPOINTS = 17; + static constexpr int N_PIFPAF_BONES = 19; + + // Connections between the different keypoint indices. + // Note: these start at 1, not 0! + static const int bones[19][2]; + float keypointThreshold; + + ai_app::Object_detection::Result postprocess( + int inputWidth, int inputHeight, + int tensorWidth, int tensorHeight, + const std::vector& pif, + const std::vector& paf); + + private: + struct Annotation { + // Array of `N_PIFPAF_KEYPOINTS * 3` elements: + // - element `i*3 + 0` is x-coordinate (normalized) + // - element `i*3 + 1` is y-coordinate (normalized) + // - element `i*3 + 2` is confidence score + std::vector keypoints; + + std::vector jointScales; + + Annotation(int j, float x, float y, float v) + : keypoints(N_PIFPAF_KEYPOINTS * 3) + , jointScales(N_PIFPAF_KEYPOINTS) + { + keypoints[j * 3] = x; + keypoints[j * 3 + 1] = y; + keypoints[j * 3 + 2] = v; + } + + /** + Overall confidence score for the entire skeleton. + */ + float score() const + { + float maxv = 0.0f; + float vv = 0.0f; + for (int k = 0; k < N_PIFPAF_KEYPOINTS; ++k) { + auto v = keypoints[k * 3 + 2]; + if (v > maxv) { + maxv = v; + } + vv += v * v; + } + return 0.1f * maxv + 0.9f * vv / (float)N_PIFPAF_KEYPOINTS; + } + }; + + typedef std::tuple, std::vector, std::vector> Target_intensity; + + private: + void initTensors(int tensorWidth, int tensorHeight); + + Target_intensity + targetIntensities(const std::vector& pif, + float v_th = 0.1f, + bool coreOnly = false); + + std::tuple + growConnectionBlend(float x, float y, float s, const std::array, 9>& paf_field_); + + // frontier_t frontierIter(Annotation& ann); + + void grow(Annotation& ann, + const FBContainer& pafForward, + const FBContainer& pafBackward); + + std::vector softNMS(std::vector& annotations); + + private: + // Tensor dimensions (hr = high-resolution). + int H, W, H_hr, W_hr; + + // Strides for tensor dimensions. + size_t pif_stride_1, pif_stride_0; + size_t pifhr_stride_1, pifhr_stride_0; + + // Filled in by targetIntensities(). + std::vector targetsCoreOnly; + std::vector targets; + std::vector scales; + std::vector ns; + }; + +} +}