diff --git a/README.md b/README.md
index 2db67570..7806a2d4 100644
--- a/README.md
+++ b/README.md
@@ -80,6 +80,7 @@ We compare the prediction performance of HyperPose with [OpenPose 1.6](https://g
| OpenPose (TinyVGG) | 34.7 MB | 384 x 256 | **124.925 FPS** | N/A |
| OpenPose (MobileNet) | 17.9 MB | 432 x 368 | **84.32 FPS** | 8.5 FPS (TF-Pose) |
| OpenPose (ResNet18) | 45.0 MB | 432 x 368 | **62.52 FPS** | N/A |
+| OpenPifPaf (ResNet50) | 97.6 MB | 97 x 129 | **178.6 FPS** | 35.3 |
diff --git a/cmake/hyperpose.cmake b/cmake/hyperpose.cmake
index 6ebb96b5..28131965 100644
--- a/cmake/hyperpose.cmake
+++ b/cmake/hyperpose.cmake
@@ -5,10 +5,15 @@ set(POSE_LIB_NAME hyperpose)
INCLUDE(cmake/cuda.cmake)
FIND_PACKAGE(OpenCV REQUIRED)
+FILE(GLOB PIFPAF_DECODER
+ src/pifpaf_decoder/*.cpp)
+
ADD_LIBRARY(
${POSE_LIB_NAME} # SHARED
src/logging.cpp
src/tensorrt.cpp
+ src/pifpaf.cpp
+ ${PIFPAF_DECODER}
src/paf.cpp
src/data.cpp
src/stream.cpp
diff --git a/cmake/hyperpose.fake.cmake b/cmake/hyperpose.fake.cmake
index 07ae7c1c..7aa1ce0c 100644
--- a/cmake/hyperpose.fake.cmake
+++ b/cmake/hyperpose.fake.cmake
@@ -12,7 +12,11 @@ ADD_LIBRARY(
src/stream.cpp
src/thread_pool.cpp
src/pose_proposal.cpp
- src/human.cpp)
+ src/human.cpp
+ src/pifpaf.cpp
+ src/pifpaf_decoder/math_helpers.cpp
+ src/pifpaf_decoder/openpifpaf_postprocessor.cpp
+)
TARGET_LINK_LIBRARIES(
${POSE_LIB_NAME}
diff --git a/examples/cli.cpp b/examples/cli.cpp
index 80a54aa4..dc4dd9f5 100644
--- a/examples/cli.cpp
+++ b/examples/cli.cpp
@@ -9,16 +9,17 @@
#define kSTREAM "stream"
#define kPAF "paf"
#define kPPN "ppn"
+#define kPIFPAF "pifpaf"
// Model Configuration.
DEFINE_string(model, "../data/models/TinyVGG-V1-HW=256x384.uff", "Path to the model.");
DEFINE_string(
post,
kPAF,
- "Post-processing method. (`" kPAF "` -> [Part Affine Field] or `" kPPN "` -> [Pose Proposal Network])");
+ "Post-processing method. (`" kPAF "` -> [Part Affine Field] or `" kPPN "` -> [Pose Proposal Network]) or `" kPIFPAF "` -> [Pif Paf]");
DEFINE_int32(w, 384, "Width of input image.");
DEFINE_int32(h, 256, "Height of input image.");
-DEFINE_int32(max_batch_size, 8, "Max batch size for inference engine to execute.");
+DEFINE_int32(max_batch_size, 4, "Max batch size for inference engine to execute.");
// Execution Mode
DEFINE_bool(imshow, true, "Whether to open an imshow window.");
@@ -37,18 +38,19 @@ namespace hp = hyperpose;
class parser_variant {
public:
+ using var_t = std::variant;
template
std::vector process(Container&& feature_map_containers)
{
return std::visit([&feature_map_containers](auto& arg) { return arg.process(feature_map_containers); }, m_parser);
}
- parser_variant(std::variant v)
+ parser_variant(var_t v)
: m_parser(std::move(v))
{
}
private:
- std::variant m_parser;
+ var_t m_parser;
};
//parser_variant parser{parser};
@@ -142,14 +144,17 @@ int main(int argc, char** argv)
}();
cli_log() << "DNN engine is built.\n";
- auto parser = parser_variant{ [&engine]() -> std::variant {
+ auto parser = parser_variant{ [&engine]() -> parser_variant::var_t {
if (FLAGS_post == kPAF)
return hp::parser::paf{};
if (FLAGS_post == kPPN)
return hp::parser::pose_proposal(engine.input_size());
- cli_log() << "ERROR: Unknown post-processing flag: `" << FLAGS_post << "`. Use `paf` or `ppn` please.\n";
+ if (FLAGS_post == kPIFPAF)
+ return hp::parser::pifpaf(engine.input_size().height, engine.input_size().width);
+
+ cli_log() << "ERROR: Unknown post-processing flag: `" << FLAGS_post << "`. Use `paf`, `ppn` or `pifpaf` please.\n";
std::exit(-1);
}() };
@@ -179,6 +184,7 @@ int main(int argc, char** argv)
if (FLAGS_runtime == kOPERATOR) {
if (images.empty()) { // For CAP.
+ auto beg = clk_t::now();
auto writer = make_writer();
while (cap.isOpened()) {
cv::Mat mat;
@@ -222,6 +228,9 @@ int main(int argc, char** argv)
break;
}
}
+ auto inference_time = std::chrono::duration(clk_t::now() - beg).count();
+ std::cout << cap.get(cv::CAP_PROP_FRAME_COUNT) << " images got processed in " << inference_time << " ms, FPS = "
+ << 1000. * cap.get(cv::CAP_PROP_FRAME_COUNT) / inference_time << '\n';
} else { // For Vec.
auto beg = clk_t::now();
// * TensorRT Inference.
diff --git a/examples/gen_serialized_engine.example.cpp b/examples/gen_serialized_engine.example.cpp
index f172a930..3f09dd98 100644
--- a/examples/gen_serialized_engine.example.cpp
+++ b/examples/gen_serialized_engine.example.cpp
@@ -12,7 +12,7 @@ DEFINE_string(output_name_list, "outputs/conf,outputs/paf", "The output node nam
DEFINE_int32(input_height, 256, "Height of input image.");
DEFINE_int32(input_width, 384, "Width of input image.");
-DEFINE_int32(max_batch_size, 32, "The max batch size for the exported serialized model.");
+DEFINE_int32(max_batch_size, 1, "The max batch size for the exported serialized model.");
DEFINE_string(output_model, "", "Path to output serialized model.");
diff --git a/examples/operator_api_batched_images_pifpaf.example.cpp b/examples/operator_api_batched_images_pifpaf.example.cpp
new file mode 100644
index 00000000..5f57fa11
--- /dev/null
+++ b/examples/operator_api_batched_images_pifpaf.example.cpp
@@ -0,0 +1,77 @@
+#include "utils.hpp"
+#include
+#include
+#include
+
+// Model flags
+DEFINE_string(model_file, "../data/models/openpifpaf-resnet50.onnx", "Path to the model.");
+
+DEFINE_bool(logging, false, "Print the logging information or not.");
+DEFINE_int32(input_height, 640, "Height of input image.");
+DEFINE_int32(input_width, 427, "Width of input image.");
+
+DEFINE_string(input_folder, "../data/media", "Folder of images to inference.");
+
+int main(int argc, char** argv)
+{
+ gflags::ParseCommandLineFlags(&argc, &argv, true);
+
+ // * Collect data into batch.
+ std::vector batch = glob_images(FLAGS_input_folder);
+
+ if (batch.empty()) {
+ example_log() << "No input images got. Exiting.\n";
+ exit(-1);
+ }
+
+ example_log() << "Batch shape: [" << batch.size() << ", 3, " << FLAGS_input_height << ", " << FLAGS_input_width << "]\n";
+
+ // * Create TensorRT engine.
+ namespace hp = hyperpose;
+ if (FLAGS_logging)
+ hp::enable_logging();
+
+ auto engine = [&] {
+ using namespace hp::dnn;
+ constexpr std::string_view onnx_suffix = ".onnx";
+ constexpr std::string_view uff_suffix = ".uff";
+
+ if (std::equal(onnx_suffix.crbegin(), onnx_suffix.crend(), FLAGS_model_file.crbegin()))
+ return tensorrt(onnx{ FLAGS_model_file }, { FLAGS_input_width, FLAGS_input_height }, batch.size());
+
+ example_log() << "Your model file's suffix is not [.onnx | .uff]. Your model file path: " << FLAGS_model_file;
+ example_log() << "Trying to be viewed as a serialized TensorRT model.";
+
+ return tensorrt(tensorrt_serialized{ FLAGS_model_file }, { FLAGS_input_width, FLAGS_input_height }, batch.size());
+ }();
+
+ hp::parser::pifpaf parser(engine.input_size().height, engine.input_size().width);
+
+ using clk_t = std::chrono::high_resolution_clock;
+ auto beg = clk_t::now();
+ {
+ // * TensorRT Inference.
+ auto feature_map_packets = engine.inference(batch);
+ for (const auto& packet : feature_map_packets)
+ for (const auto& feature_map : packet)
+ example_log() << feature_map << std::endl;
+
+ // * Paf.
+ std::vector> pose_vectors;
+ pose_vectors.reserve(feature_map_packets.size());
+ for (auto&& packet : feature_map_packets) {
+ pose_vectors.push_back(parser.process(packet[0], packet[1]));
+ }
+
+ std::cout << batch.size() << " images got processed. FPS = "
+ << 1000. * batch.size() / std::chrono::duration(clk_t::now() - beg).count()
+ << '\n';
+
+ for (size_t i = 0; i < batch.size(); ++i) {
+ cv::resize(batch[i], batch[i], { FLAGS_input_width, FLAGS_input_height });
+ for (auto&& pose : pose_vectors[i])
+ hp::draw_human(batch[i], pose);
+ cv::imwrite("output_" + std::to_string(i) + ".png", batch[i]);
+ }
+ }
+}
\ No newline at end of file
diff --git a/include/hyperpose/hyperpose.hpp b/include/hyperpose/hyperpose.hpp
index a851c758..b9df8fcb 100644
--- a/include/hyperpose/hyperpose.hpp
+++ b/include/hyperpose/hyperpose.hpp
@@ -9,6 +9,7 @@
#include "operator/dnn/tensorrt.hpp"
#include "operator/parser/paf.hpp"
+#include "operator/parser/pifpaf.hpp"
#include "operator/parser/proposal_network.hpp"
#include "stream/stream.hpp"
\ No newline at end of file
diff --git a/include/hyperpose/operator/parser/pifpaf.hpp b/include/hyperpose/operator/parser/pifpaf.hpp
new file mode 100644
index 00000000..41eb83de
--- /dev/null
+++ b/include/hyperpose/operator/parser/pifpaf.hpp
@@ -0,0 +1,28 @@
+#pragma once
+
+#include "../../utility/data.hpp"
+#include "paf.hpp"
+
+namespace hyperpose::parser {
+
+class pifpaf {
+public:
+ inline explicit pifpaf(int h, int w, float thresh = 0.1)
+ : m_net_h(h)
+ , m_net_w(w)
+ , m_keypoint_thresh(thresh){};
+ std::vector process(const feature_map_t& pif, const feature_map_t& paf);
+ template
+ std::vector process(C&& feature_map_containers)
+ {
+ // 1@pif, 2@paf.
+ assert(feature_map_containers.size() == 2);
+ return process(feature_map_containers[0], feature_map_containers[1]);
+ }
+
+private:
+ int m_net_w, m_net_h;
+ float m_keypoint_thresh;
+};
+
+} // namespace hyperpose
\ No newline at end of file
diff --git a/src/human.cpp b/src/human.cpp
index dfc46893..7473a116 100644
--- a/src/human.cpp
+++ b/src/human.cpp
@@ -7,7 +7,7 @@ namespace hyperpose {
void draw_human(cv::Mat& img, const human_t& human)
{
float n = 1, s = 0, w = 1, e = 0;
- for(const auto& p : human.parts)
+ for (const auto& p : human.parts)
if (p.has_value) {
n = std::min(n, p.y);
s = std::max(s, p.y);
diff --git a/src/pifpaf.cpp b/src/pifpaf.cpp
new file mode 100644
index 00000000..4bfc7064
--- /dev/null
+++ b/src/pifpaf.cpp
@@ -0,0 +1,97 @@
+#include "pifpaf_decoder/openpifpaf_postprocessor.hpp"
+#include
+
+namespace hyperpose::parser {
+
+// TODO: Name ORDER!
+std::vector pifpaf::process(const feature_map_t& paf, const feature_map_t& pif)
+{
+ // Helpful links (Chinese)::
+ // https://zhuanlan.zhihu.com/p/93896207
+ // https://zhuanlan.zhihu.com/p/68073113
+ // pif: [17, 5, h, w] => KEY POINTS;
+ // 5: [conf, dx, dy, b, scale]
+ // Example: array([ 0.00527313, 0.13620843, -0.32253477, 0.3263721 , 0.90980804], dtype=float32)
+ // heat map: f(x, y) = \sum_ij conf * N(x, y|ij)
+ // paf: [19, 9, h, w] => LIMBS;
+ // 9: [conf, [x1, y1, x2, y2], [b1, b2], [s1, s2]]
+ // Example: [ 0.00712654, -0.54057586, 5.4075847 , 3.0354404 , 3.1246614 , 1.0621283 , -3.5857565 , 2.6072054 , 3.8406293 ],
+ // TODO: OPTIMIZE THIS.
+
+ lpdnn::aiapp_impl::OpenPifPafPostprocessor pp;
+ pp.keypointThreshold = m_keypoint_thresh;
+ size_t h = pif.shape()[pif.shape().size() - 2];
+ size_t w = pif.shape().back();
+
+ std::vector pif_vec{}, paf_vec{};
+
+ const auto raw_copy = [](const feature_map_t& tensor, std::vector& vec) {
+ size_t d0 = tensor.shape()[0];
+ size_t d1 = tensor.shape()[1];
+ size_t h = tensor.shape()[2];
+ size_t w = tensor.shape()[3];
+ const size_t total_size = d0 * d1 * h * w;
+ vec.reserve(total_size);
+ for (size_t i = 0; i < total_size; ++i) {
+ vec.push_back(tensor.view()[i]);
+ }
+ };
+
+ raw_copy(pif, pif_vec);
+ raw_copy(paf, paf_vec);
+
+ // TODO: RECOVER THE INP{W, H};
+ auto apires = pp.postprocess(m_net_w, m_net_h, w, h, pif_vec, paf_vec);
+
+ std::vector ret{};
+ ret.reserve(apires.items.size());
+ // OpenPifPaf COCO Topology: https://miro.medium.com/max/366/0*KFrFQVj3OoGAtt6o.png
+ // HyperPose: Unified Topology
+ // NOTE: This step is to convert pifpaf topology to hyperpose topology.
+
+ for (auto&& item : apires.items) {
+ if (item.landmarks.points.empty())
+ continue;
+ human_t man{};
+ man.score = item.confidence;
+
+ auto p2p = [this](const auto& src, auto& dst) {
+ if (src.confidence > 0.) {
+ dst.score = 1; // src.confidence; FIXME
+ dst.x = src.position.x / (float)m_net_w;
+ dst.y = src.position.y / (float)m_net_h;
+ dst.has_value = true;
+ }
+ };
+
+ auto& from = item.landmarks.points;
+ auto& to = man.parts;
+ // OpenPifPaf -> HyperPose
+ p2p(from[0], to[0]);
+ // ! to [1]
+ constexpr std::array from_index = {
+ 6, 8, 10, 5, 7, 9,
+ 12, 14, 16, 11, 13, 15,
+ 2, 1, 4, 3
+ };
+
+ for (size_t i = 0; i < from_index.size(); ++i) {
+ p2p(from[from_index[i]], to[i + 2]);
+ }
+
+ if (to[2].has_value && to[5].has_value) {
+ to[1].x = (to[2].x + to[5].x) / 2;
+ ;
+ to[1].y = (to[2].y + to[5].y) / 2;
+ ;
+ to[1].has_value = true;
+ to[1].score = (to[2].score + to[5].score) / 2;
+ }
+
+ ret.push_back(man);
+ }
+
+ return ret;
+}
+
+} // namespace hyperpose
\ No newline at end of file
diff --git a/src/pifpaf_decoder/aiapp.hpp b/src/pifpaf_decoder/aiapp.hpp
new file mode 100644
index 00000000..85c75a09
--- /dev/null
+++ b/src/pifpaf_decoder/aiapp.hpp
@@ -0,0 +1,118 @@
+///
+/// Ai-app base interface and types
+///
+/// \copyright 2018 NVISO SA. All rights reserved.
+/// \license This project is released under the XXXXXX License.
+///
+
+#pragma once
+
+#include
+#include