tensorlayer · Gyx-One · Jun 26, 2021 · Nov 19, 2020 · Jun 25, 2021 · Jun 26, 2021
diff --git a/README.md b/README.md
@@ -80,6 +80,7 @@ We compare the prediction performance of HyperPose with [OpenPose 1.6](https://g
 | OpenPose (TinyVGG)  | 34.7 MB       | 384 x 256          | **124.925 FPS**         | N/A                   |
 | OpenPose (MobileNet) | 17.9 MB       | 432 x 368          | **84.32 FPS**           | 8.5 FPS (TF-Pose)         |
 | OpenPose (ResNet18)  | 45.0 MB       | 432 x 368          | **62.52 FPS**           | N/A                  |
+| OpenPifPaf (ResNet50)  | 97.6 MB       | 97 x 129          | **178.6 FPS**           | 35.3                  |
 
 </a>
 <p align="center">

diff --git a/cmake/hyperpose.cmake b/cmake/hyperpose.cmake
@@ -5,10 +5,15 @@ set(POSE_LIB_NAME hyperpose)
 INCLUDE(cmake/cuda.cmake)
 FIND_PACKAGE(OpenCV REQUIRED)
 
+FILE(GLOB PIFPAF_DECODER
+        src/pifpaf_decoder/*.cpp)
+
 ADD_LIBRARY(
         ${POSE_LIB_NAME} # SHARED
         src/logging.cpp
         src/tensorrt.cpp
+        src/pifpaf.cpp
+        ${PIFPAF_DECODER}
         src/paf.cpp
         src/data.cpp
         src/stream.cpp

diff --git a/cmake/hyperpose.fake.cmake b/cmake/hyperpose.fake.cmake
@@ -12,7 +12,11 @@ ADD_LIBRARY(
         src/stream.cpp
         src/thread_pool.cpp
         src/pose_proposal.cpp
-        src/human.cpp)
+        src/human.cpp
+        src/pifpaf.cpp
+        src/pifpaf_decoder/math_helpers.cpp
+        src/pifpaf_decoder/openpifpaf_postprocessor.cpp
+)
 
 TARGET_LINK_LIBRARIES(
         ${POSE_LIB_NAME}

diff --git a/examples/cli.cpp b/examples/cli.cpp
@@ -9,16 +9,17 @@
 #define kSTREAM "stream"
 #define kPAF "paf"
 #define kPPN "ppn"
+#define kPIFPAF "pifpaf"
 
 // Model Configuration.
 DEFINE_string(model, "../data/models/TinyVGG-V1-HW=256x384.uff", "Path to the model.");
 DEFINE_string(
     post,
     kPAF,
-    "Post-processing method. (`" kPAF "` -> [Part Affine Field] or `" kPPN "` -> [Pose Proposal Network])");
+    "Post-processing method. (`" kPAF "` -> [Part Affine Field] or `" kPPN "` -> [Pose Proposal Network]) or `" kPIFPAF "` -> [Pif Paf]");
 DEFINE_int32(w, 384, "Width of input image.");
 DEFINE_int32(h, 256, "Height of input image.");
-DEFINE_int32(max_batch_size, 8, "Max batch size for inference engine to execute.");
+DEFINE_int32(max_batch_size, 4, "Max batch size for inference engine to execute.");
 
 // Execution Mode
 DEFINE_bool(imshow, true, "Whether to open an imshow window.");
@@ -37,18 +38,19 @@ namespace hp = hyperpose;
 
 class parser_variant {
 public:
+    using var_t = std::variant<hp::parser::pose_proposal, hp::parser::paf, hp::parser::pifpaf>;
     template <typename Container>
     std::vector<hp::human_t> process(Container&& feature_map_containers)
     {
         return std::visit([&feature_map_containers](auto& arg) { return arg.process(feature_map_containers); }, m_parser);
     }
-    parser_variant(std::variant<hp::parser::pose_proposal, hp::parser::paf> v)
+    parser_variant(var_t v)
         : m_parser(std::move(v))
     {
     }
 
 private:
-    std::variant<hp::parser::pose_proposal, hp::parser::paf> m_parser;
+    var_t m_parser;
 };
 //parser_variant parser{parser};
 
@@ -142,14 +144,17 @@ int main(int argc, char** argv)
     }();
     cli_log() << "DNN engine is built.\n";
 
-    auto parser = parser_variant{ [&engine]() -> std::variant<hp::parser::pose_proposal, hp::parser::paf> {
+    auto parser = parser_variant{ [&engine]() -> parser_variant::var_t {
         if (FLAGS_post == kPAF)
             return hp::parser::paf{};
 
         if (FLAGS_post == kPPN)
             return hp::parser::pose_proposal(engine.input_size());
 
-        cli_log() << "ERROR: Unknown post-processing flag: `" << FLAGS_post << "`. Use `paf` or `ppn` please.\n";
+        if (FLAGS_post == kPIFPAF)
+            return hp::parser::pifpaf(engine.input_size().height, engine.input_size().width);
+
+        cli_log() << "ERROR: Unknown post-processing flag: `" << FLAGS_post << "`. Use `paf`, `ppn` or `pifpaf` please.\n";
         std::exit(-1);
     }() };
 
@@ -179,6 +184,7 @@ int main(int argc, char** argv)
     if (FLAGS_runtime == kOPERATOR) {
         if (images.empty()) { // For CAP.
 
+            auto beg = clk_t::now();
             auto writer = make_writer();
             while (cap.isOpened()) {
                 cv::Mat mat;
@@ -222,6 +228,9 @@ int main(int argc, char** argv)
                         break;
                 }
             }
+            auto inference_time = std::chrono::duration<double, std::milli>(clk_t::now() - beg).count();
+            std::cout << cap.get(cv::CAP_PROP_FRAME_COUNT) << " images got processed in " << inference_time << " ms, FPS = "
+                      << 1000. * cap.get(cv::CAP_PROP_FRAME_COUNT) / inference_time << '\n';
         } else { // For Vec<Image>.
             auto beg = clk_t::now();
             // * TensorRT Inference.

diff --git a/examples/gen_serialized_engine.example.cpp b/examples/gen_serialized_engine.example.cpp
@@ -12,7 +12,7 @@ DEFINE_string(output_name_list, "outputs/conf,outputs/paf", "The output node nam
 
 DEFINE_int32(input_height, 256, "Height of input image.");
 DEFINE_int32(input_width, 384, "Width of input image.");
-DEFINE_int32(max_batch_size, 32, "The max batch size for the exported serialized model.");
+DEFINE_int32(max_batch_size, 1, "The max batch size for the exported serialized model.");
 
 DEFINE_string(output_model, "", "Path to output serialized model.");
 

diff --git a/examples/operator_api_batched_images_pifpaf.example.cpp b/examples/operator_api_batched_images_pifpaf.example.cpp
@@ -0,0 +1,77 @@
+#include "utils.hpp"
+#include <gflags/gflags.h>
+#include <hyperpose/hyperpose.hpp>
+#include <string_view>
+
+// Model flags
+DEFINE_string(model_file, "../data/models/openpifpaf-resnet50.onnx", "Path to the model.");
+
+DEFINE_bool(logging, false, "Print the logging information or not.");
+DEFINE_int32(input_height, 640, "Height of input image.");
+DEFINE_int32(input_width, 427, "Width of input image.");
+
+DEFINE_string(input_folder, "../data/media", "Folder of images to inference.");
+
+int main(int argc, char** argv)
+{
+    gflags::ParseCommandLineFlags(&argc, &argv, true);
+
+    // * Collect data into batch.
+    std::vector<cv::Mat> batch = glob_images(FLAGS_input_folder);
+
+    if (batch.empty()) {
+        example_log() << "No input images got. Exiting.\n";
+        exit(-1);
+    }
+
+    example_log() << "Batch shape: [" << batch.size() << ", 3, " << FLAGS_input_height << ", " << FLAGS_input_width << "]\n";
+
+    // * Create TensorRT engine.
+    namespace hp = hyperpose;
+    if (FLAGS_logging)
+        hp::enable_logging();
+
+    auto engine = [&] {
+        using namespace hp::dnn;
+        constexpr std::string_view onnx_suffix = ".onnx";
+        constexpr std::string_view uff_suffix = ".uff";
+
+        if (std::equal(onnx_suffix.crbegin(), onnx_suffix.crend(), FLAGS_model_file.crbegin()))
+            return tensorrt(onnx{ FLAGS_model_file }, { FLAGS_input_width, FLAGS_input_height }, batch.size());
+
+        example_log() << "Your model file's suffix is not [.onnx | .uff]. Your model file path: " << FLAGS_model_file;
+        example_log() << "Trying to be viewed as a serialized TensorRT model.";
+
+        return tensorrt(tensorrt_serialized{ FLAGS_model_file }, { FLAGS_input_width, FLAGS_input_height }, batch.size());
+    }();
+
+    hp::parser::pifpaf parser(engine.input_size().height, engine.input_size().width);
+
+    using clk_t = std::chrono::high_resolution_clock;
+    auto beg = clk_t::now();
+    {
+        // * TensorRT Inference.
+        auto feature_map_packets = engine.inference(batch);
+        for (const auto& packet : feature_map_packets)
+            for (const auto& feature_map : packet)
+                example_log() << feature_map << std::endl;
+
+        // * Paf.
+        std::vector<std::vector<hp::human_t>> pose_vectors;
+        pose_vectors.reserve(feature_map_packets.size());
+        for (auto&& packet : feature_map_packets) {
+            pose_vectors.push_back(parser.process(packet[0], packet[1]));
+        }
+
+        std::cout << batch.size() << " images got processed. FPS = "
+                  << 1000. * batch.size() / std::chrono::duration<double, std::milli>(clk_t::now() - beg).count()
+                  << '\n';
+
+        for (size_t i = 0; i < batch.size(); ++i) {
+            cv::resize(batch[i], batch[i], { FLAGS_input_width, FLAGS_input_height });
+            for (auto&& pose : pose_vectors[i])
+                hp::draw_human(batch[i], pose);
+            cv::imwrite("output_" + std::to_string(i) + ".png", batch[i]);
+        }
+    }
+}
diff --git a/include/hyperpose/hyperpose.hpp b/include/hyperpose/hyperpose.hpp
@@ -9,6 +9,7 @@
 
 #include "operator/dnn/tensorrt.hpp"
 #include "operator/parser/paf.hpp"
+#include "operator/parser/pifpaf.hpp"
 #include "operator/parser/proposal_network.hpp"
 
 #include "stream/stream.hpp"
diff --git a/include/hyperpose/operator/parser/pifpaf.hpp b/include/hyperpose/operator/parser/pifpaf.hpp
@@ -0,0 +1,28 @@
+#pragma once
+
+#include "../../utility/data.hpp"
+#include "paf.hpp"
+
+namespace hyperpose::parser {
+
+class pifpaf {
+public:
+    inline explicit pifpaf(int h, int w, float thresh = 0.1)
+        : m_net_h(h)
+        , m_net_w(w)
+        , m_keypoint_thresh(thresh){};
+    std::vector<human_t> process(const feature_map_t& pif, const feature_map_t& paf);
+    template <typename C>
+    std::vector<human_t> process(C&& feature_map_containers)
+    {
+        // 1@pif, 2@paf.
+        assert(feature_map_containers.size() == 2);
+        return process(feature_map_containers[0], feature_map_containers[1]);
+    }
+
+private:
+    int m_net_w, m_net_h;
+    float m_keypoint_thresh;
+};
+
+} // namespace hyperpose
diff --git a/src/human.cpp b/src/human.cpp
@@ -7,7 +7,7 @@ namespace hyperpose {
 void draw_human(cv::Mat& img, const human_t& human)
 {
     float n = 1, s = 0, w = 1, e = 0;
-    for(const auto& p : human.parts)
+    for (const auto& p : human.parts)
         if (p.has_value) {
             n = std::min(n, p.y);
             s = std::max(s, p.y);

diff --git a/src/pifpaf.cpp b/src/pifpaf.cpp
@@ -0,0 +1,97 @@
+#include "pifpaf_decoder/openpifpaf_postprocessor.hpp"
+#include <hyperpose/operator/parser/pifpaf.hpp>
+
+namespace hyperpose::parser {
+
+// TODO: Name ORDER!
+std::vector<human_t> pifpaf::process(const feature_map_t& paf, const feature_map_t& pif)
+{
+    // Helpful links (Chinese)::
+    // https://zhuanlan.zhihu.com/p/93896207
+    // https://zhuanlan.zhihu.com/p/68073113
+    // pif: [17, 5, h, w] => KEY POINTS;
+    // 5: [conf, dx, dy, b, scale]
+    // Example: array([ 0.00527313,  0.13620843, -0.32253477,  0.3263721 ,  0.90980804], dtype=float32)
+    // heat map: f(x, y) = \sum_ij conf * N(x, y|ij)
+    // paf: [19, 9, h, w] => LIMBS;
+    // 9: [conf, [x1, y1, x2, y2], [b1, b2], [s1, s2]]
+    // Example: [ 0.00712654, -0.54057586,  5.4075847 ,  3.0354404 ,  3.1246614 ,  1.0621283 , -3.5857565 ,  2.6072054 ,  3.8406293 ],
+    // TODO: OPTIMIZE THIS.
+
+    lpdnn::aiapp_impl::OpenPifPafPostprocessor pp;
+    pp.keypointThreshold = m_keypoint_thresh;
+    size_t h = pif.shape()[pif.shape().size() - 2];
+    size_t w = pif.shape().back();
+
+    std::vector<float> pif_vec{}, paf_vec{};
+
+    const auto raw_copy = [](const feature_map_t& tensor, std::vector<float>& vec) {
+        size_t d0 = tensor.shape()[0];
+        size_t d1 = tensor.shape()[1];
+        size_t h = tensor.shape()[2];
+        size_t w = tensor.shape()[3];
+        const size_t total_size = d0 * d1 * h * w;
+        vec.reserve(total_size);
+        for (size_t i = 0; i < total_size; ++i) {
+            vec.push_back(tensor.view<float>()[i]);
+        }
+    };
+
+    raw_copy(pif, pif_vec);
+    raw_copy(paf, paf_vec);
+
+    // TODO: RECOVER THE INP{W, H};
+    auto apires = pp.postprocess(m_net_w, m_net_h, w, h, pif_vec, paf_vec);
+
+    std::vector<human_t> ret{};
+    ret.reserve(apires.items.size());
+    // OpenPifPaf COCO Topology: https://miro.medium.com/max/366/0*KFrFQVj3OoGAtt6o.png
+    // HyperPose: Unified Topology
+    // NOTE: This step is to convert pifpaf topology to hyperpose topology.
+
+    for (auto&& item : apires.items) {
+        if (item.landmarks.points.empty())
+            continue;
+        human_t man{};
+        man.score = item.confidence;
+
+        auto p2p = [this](const auto& src, auto& dst) {
+            if (src.confidence > 0.) {
+                dst.score = 1; // src.confidence; FIXME
+                dst.x = src.position.x / (float)m_net_w;
+                dst.y = src.position.y / (float)m_net_h;
+                dst.has_value = true;
+            }
+        };
+
+        auto& from = item.landmarks.points;
+        auto& to = man.parts;
+        // OpenPifPaf -> HyperPose
+        p2p(from[0], to[0]);
+        // ! to [1]
+        constexpr std::array<size_t, 16> from_index = {
+            6, 8, 10, 5, 7, 9,
+            12, 14, 16, 11, 13, 15,
+            2, 1, 4, 3
+        };
+
+        for (size_t i = 0; i < from_index.size(); ++i) {
+            p2p(from[from_index[i]], to[i + 2]);
+        }
+
+        if (to[2].has_value && to[5].has_value) {
+            to[1].x = (to[2].x + to[5].x) / 2;
+            ;
+            to[1].y = (to[2].y + to[5].y) / 2;
+            ;
+            to[1].has_value = true;
+            to[1].score = (to[2].score + to[5].score) / 2;
+        }
+
+        ret.push_back(man);
+    }
+
+    return ret;
+}
+
+} // namespace hyperpose