From c9dfc79ca91d9b76ce151dfdea316e14e42b50f7 Mon Sep 17 00:00:00 2001 From: Bernd Pfrommer Date: Tue, 15 Jul 2025 04:27:30 -0400 Subject: [PATCH 01/12] new interfaces to support decoder fallback, encoder avoids cuda and vaapi hard coding, improved flush --- README.md | 4 + include/ffmpeg_encoder_decoder/decoder.hpp | 83 ++++++- include/ffmpeg_encoder_decoder/encoder.hpp | 59 +++-- include/ffmpeg_encoder_decoder/utils.hpp | 25 +- src/decoder.cpp | 257 ++++++++++++++++----- src/encoder.cpp | 53 +++-- src/utils.cpp | 107 ++++++++- test/encoder_test.cpp | 3 +- 8 files changed, 475 insertions(+), 116 deletions(-) diff --git a/README.md b/README.md index 36d6b28..7104c07 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,10 @@ nvmpi like so: 'ffmpeg_image_transport.preset': 'll', 'ffmpeg_image_transport.gop_size': 15}] ``` +Sometimes the ffmpeg parameters show up under different names. If the above +settings don't work, try the command ``ros2 param dump `` +*after* subscribing to the ffmpeg image topic with e.g. ``ros2 topic hz``. +From the output you can see what the correct parameter names are. ## License This software is issued under the Apache License Version 2.0. diff --git a/include/ffmpeg_encoder_decoder/decoder.hpp b/include/ffmpeg_encoder_decoder/decoder.hpp index 0e2521c..727a7ea 100644 --- a/include/ffmpeg_encoder_decoder/decoder.hpp +++ b/include/ffmpeg_encoder_decoder/decoder.hpp @@ -44,27 +44,90 @@ class Decoder Decoder(); ~Decoder(); + /** + * Test if decoder is initialized. + * @return true if the decoder is initialized. + */ bool isInitialized() const { return (codecContext_ != NULL); } - // Initialize decoder upon first packet received, - // providing callback to be called when frame is complete. - // You must still call decodePacket(msg) afterward! - bool initialize(const std::string & encoding, Callback callback, const std::string & dec); - // clears all state, but leaves config intact + /** + * Initialize decoder, with multiple decoders to pick from. + * Will pick hardware accelerated decoders first if available. + * If decoders.empty() a default decoder will be chosen (if available). + * @param encoding the encoding from the first packet. Can never change! + * @param callback the function to call when frame has been decoded + * @param decoder the decoder to use. If empty string, + * the decoder will try to find a suitable one based on the encoding + * @return true if successful + */ + + bool initialize(const std::string & encoding, Callback callback, const std::string & decoder); + /** + * Initialize decoder with multiple decoders to pick from. + * Will pick hardware accelerated decoders first if available. + * If decoders.empty() a default decoder will be chosen (if available). + * @param encoding the encoding from the first packet. Can never change! + * @param callback the function to call when frame has been decoded + * @param decoders the set of decoders to try sequentially. If empty() + * the decoder will try to find a suitable one based on the encoding + * @return true if successful + */ + bool initialize( + const std::string & encoding, Callback callback, const std::vector & decoders); + /** + * Clears all decoder state but not timers, loggers, and other settings. + */ void reset(); - // decode packet (may result in frame callback!) + /** + * Decodes packet. Decoder must have been initialized beforehand. Calling this + * function may result in callback with decoded frame. + * @param encoding the name of the encoding (typically from msg encoding) + * @param data pointer to packet data + * @param size size of packet data + * @param pts presentation time stamp of data packet + * @param frame_id ros frame id (from message header) + * @param stamp ros message header time stamp + */ bool decodePacket( const std::string & encoding, const uint8_t * data, size_t size, uint64_t pts, const std::string & frame_id, const rclcpp::Time & stamp); - + /** + * Override default logger + * @param logger the logger to override the default with + */ + void setLogger(rclcpp::Logger logger) { logger_ = logger; } + /** + * deprecated, don't use! + */ + static const std::unordered_map & getDefaultEncoderToDecoderMap(); + /** + * Finds the name of hardware and software decoders that match a + * certain encoding (or encoder) + */ + static void findDecoders( + const std::string & encoding, std::vector * hw_decoders, + std::vector * sw_decoders); + /** + * Finds the name of all hardware and software decoders that match + * a certain encoding (or encoder) + */ + static std::vector findDecoders(const std::string & encoding); + /** + * For performance debugging + */ void setMeasurePerformance(bool p) { measurePerformance_ = p; } + /** + * For performance debugging + */ void printTimers(const std::string & prefix) const; + /** + * For performance debugging + */ void resetTimers(); - void setLogger(rclcpp::Logger logger) { logger_ = logger; } - static const std::unordered_map & getDefaultEncoderToDecoderMap(); private: rclcpp::Logger logger_; - bool initDecoder(const std::string & encoding, const std::string & decoder); + bool initSingleDecoder(const std::string & decoder); + bool initDecoder(const std::vector & decoders); // --------------- variables Callback callback_; PTSMap ptsToStamp_; // mapping of header diff --git a/include/ffmpeg_encoder_decoder/encoder.hpp b/include/ffmpeg_encoder_decoder/encoder.hpp index a93fb41..3d6637b 100644 --- a/include/ffmpeg_encoder_decoder/encoder.hpp +++ b/include/ffmpeg_encoder_decoder/encoder.hpp @@ -50,11 +50,8 @@ class Encoder Encoder(); ~Encoder(); // ------- various encoding settings - void setEncoder(const std::string & n) - { - Lock lock(mutex_); - encoder_ = n; - } + void setEncoder(const std::string & n); + void setProfile(const std::string & p) { Lock lock(mutex_); @@ -105,6 +102,16 @@ class Encoder Lock lock(mutex_); GOPSize_ = g; } + int getMaxBFrames() const + { + Lock lock(mutex_); + return (maxBFrames_); + } + void setMaxBFrames(int b) + { + Lock lock(mutex_); + maxBFrames_ = b; + } void setFrameRate(int frames, int second) { Lock lock(mutex_); @@ -130,8 +137,22 @@ class Encoder // encode image void encodeImage(const cv::Mat & img, const Header & header, const rclcpp::Time & t0); void encodeImage(const Image & msg); - // flush all packets. Need header to generate callback message + /**! + * flush all packets (produces callbacks). + * \param frame_id the frame id that will be presented on callback + */ + void flush(const std::string & frame_id); + /**! + * flush all packets (produces callbacks). + * \deprecated Only header.frame_id is used. Used flush(frame_id) now. + */ void flush(const Header & header); + + /**! + * finds the encoding for a given encoder, i.e. returns h264 for h264_vaapi + */ + static std::string findEncoding(const std::string & encoder); + // ------- performance statistics void printTimers(const std::string & prefix) const; void resetTimers(); @@ -142,27 +163,31 @@ class Encoder bool openCodec(int width, int height); void doOpenCodec(int width, int height); void closeCodec(); - int drainPacket(const Header & hdr, int width, int height); + int drainPacket(const std::string & frame_id, int width, int height); AVPixelFormat pixelFormat(const std::string & f) const; - void openVAAPIDevice(const AVCodec * codec, int width, int height); + void openHardwareDevice( + const AVCodec * codec, enum AVHWDeviceType hwDevType, int width, int height); void setAVOption(const std::string & field, const std::string & value); // --------- variables rclcpp::Logger logger_; mutable std::recursive_mutex mutex_; Callback callback_; // config - std::string encoder_; // e.g. "libx264" - std::string preset_; // e.g. "slow", "medium", "lossless" - std::string profile_; // e.g. "main", "high", "rext" - std::string tune_; // e.g. "tune" - std::string delay_; // default is 4 frames for parallel processing. 0 is lowest latency - std::string crf_; // constant rate factor. 0 is lossless, 51 is worst quality - int qmax_{0}; // max allowed quantization. The lower the better quality - int GOPSize_{15}; // distance between two keyframes + std::string encoder_; // e.g. "libx264" + std::string encoding_; // e.g. "h264" + std::string preset_; // e.g. "slow", "medium", "lossless" + std::string profile_; // e.g. "main", "high", "rext" + std::string tune_; // e.g. "tune" + std::string delay_; // default is 4 frames for parallel processing. 0 is lowest latency + std::string crf_; // constant rate factor. 0 is lossless, 51 is worst quality + int qmax_{-1}; // max allowed quantization. The lower the better quality + int GOPSize_{-1}; // distance between two keyframes + int maxBFrames_{-1}; // maximum number of b-frames + int64_t bitRate_{0}; // max rate in bits/s + AVPixelFormat pixFormat_{AV_PIX_FMT_NONE}; AVRational timeBase_{1, 100}; AVRational frameRate_{100, 1}; - int64_t bitRate_{1000000}; bool usesHardwareFrames_{false}; // ------ libav state AVCodecContext * codecContext_{nullptr}; diff --git a/include/ffmpeg_encoder_decoder/utils.hpp b/include/ffmpeg_encoder_decoder/utils.hpp index 7a7da4e..ff11ba4 100644 --- a/include/ffmpeg_encoder_decoder/utils.hpp +++ b/include/ffmpeg_encoder_decoder/utils.hpp @@ -16,6 +16,7 @@ #ifndef FFMPEG_ENCODER_DECODER__UTILS_HPP_ #define FFMPEG_ENCODER_DECODER__UTILS_HPP_ +#include #include #include #include @@ -62,7 +63,29 @@ std::vector get_encoder_formats( * picks from a vector of formats the "best" pixel format for a given encoder */ enum AVPixelFormat get_preferred_pixel_format( - const std::string & encoder, const std::vector & fmts); + bool useHWFormat, const std::vector & fmts); + +/**! +* finds the names of all available decoders for a given encoding (or encoder) +*/ +void find_decoders( + const std::string & encoding, std::vector * hw_decoders, + std::vector * sw_decoders); + +/**! +* * get hardware device types +*/ +std::vector get_hwdevice_types(); + +/**! +* find encoding for given encoder +*/ +std::string find_encoding(const std::string & encoder); + +/**! +* gets hardware device type for specific codec +*/ +enum AVHWDeviceType find_hw_device_type(const AVCodec * codec); } // namespace utils } // namespace ffmpeg_encoder_decoder diff --git a/src/decoder.cpp b/src/decoder.cpp index 5458f21..dc63c0c 100644 --- a/src/decoder.cpp +++ b/src/decoder.cpp @@ -14,6 +14,7 @@ // limitations under the License. #include +#include #include #include #include @@ -23,13 +24,6 @@ namespace ffmpeg_encoder_decoder { -// default mappings -static const std::unordered_map defaultMap{ - {{"h264_nvenc", "h264"}, - {"libx264", "h264"}, - {"hevc_nvenc", "hevc_cuvid"}, - {"h264_nvmpi", "h264"}, - {"h264_vaapi", "h264"}}}; Decoder::Decoder() : logger_(rclcpp::get_logger("Decoder")) {} @@ -39,8 +33,6 @@ void Decoder::reset() { if (codecContext_) { avcodec_free_context(&codecContext_); - // avcodec_close(codecContext_); - // av_free(codecContext_); codecContext_ = NULL; } if (swsContext_) { @@ -56,31 +48,37 @@ void Decoder::reset() cpuFrame_ = NULL; av_free(colorFrame_); colorFrame_ = NULL; + hwPixFormat_ = AV_PIX_FMT_NONE; } -bool Decoder::initialize(const std::string & encoding, Callback callback, const std::string & dec) +bool Decoder::initialize( + const std::string & encoding, Callback callback, const std::string & decoder) { - std::string decoder = dec; - if (decoder.empty()) { - RCLCPP_INFO_STREAM(logger_, "no decoder for encoding: " << encoding); - return (false); - } - callback_ = callback; - encoding_ = encoding; - return (initDecoder(encoding_, decoder)); + return (initialize( + encoding, callback, + decoder.empty() ? std::vector() : std::vector{decoder})); } -static enum AVHWDeviceType get_hw_type(const std::string & name, rclcpp::Logger logger) +bool Decoder::initialize( + const std::string & encoding, Callback callback, const std::vector & decoders) { - enum AVHWDeviceType type = av_hwdevice_find_type_by_name(name.c_str()); - if (type == AV_HWDEVICE_TYPE_NONE) { - RCLCPP_INFO_STREAM(logger, "hw accel device is not supported: " << name); - RCLCPP_INFO_STREAM(logger, "available devices:"); - while ((type = av_hwdevice_iterate_types(type)) != AV_HWDEVICE_TYPE_NONE) - RCLCPP_INFO_STREAM(logger, av_hwdevice_get_type_name(type)); - return (type); + callback_ = callback; + encoding_ = encoding; + if (decoders.empty()) { + const auto all_decoders = findDecoders(encoding); + if (all_decoders.empty()) { + RCLCPP_ERROR_STREAM(logger_, "no decoders discovered for encoding " << encoding_); + throw(std::runtime_error("no decoders discovered for encoding " + encoding_)); + } + std::string decoders_str; + for (const auto & decoder : all_decoders) { + decoders_str += " " + decoder; + } + RCLCPP_INFO_STREAM(logger_, "trying discovered decoders in order:" << decoders_str); + + return (initDecoder(all_decoders)); } - return (type); + return (initDecoder(decoders)); } static AVBufferRef * hw_decoder_init( @@ -88,36 +86,19 @@ static AVBufferRef * hw_decoder_init( { int rc = av_hwdevice_ctx_create(hwDeviceContext, hwType, NULL, NULL, 0); if (rc < 0) { - RCLCPP_ERROR_STREAM(logger, "failed to create context for HW device: " << hwType); + RCLCPP_ERROR_STREAM( + logger, "failed to create context for HW device: " << av_hwdevice_get_type_name(hwType)); return (NULL); } + RCLCPP_INFO_STREAM(logger, "using hardware acceleration: " << av_hwdevice_get_type_name(hwType)); return (av_buffer_ref(*hwDeviceContext)); } -static std::unordered_map pix_format_map; - -static enum AVPixelFormat get_hw_format(AVCodecContext * ctx, const enum AVPixelFormat * pix_fmts) -{ - enum AVPixelFormat pf = pix_format_map[ctx]; - const enum AVPixelFormat * p; - for (p = pix_fmts; *p != -1; p++) { - if (*p == pf) { - return *p; - } - } - std::cerr << "Failed to get HW surface format." << std::endl; - return AV_PIX_FMT_NONE; -} - -static enum AVPixelFormat find_pix_format( - const std::string & codecName, enum AVHWDeviceType hwDevType, const AVCodec * codec, - const std::string & hwAcc, rclcpp::Logger logger) +static enum AVPixelFormat find_pix_format(enum AVHWDeviceType hwDevType, const AVCodec * codec) { for (int i = 0;; i++) { const AVCodecHWConfig * config = avcodec_get_hw_config(codec, i); if (!config) { - RCLCPP_WARN_STREAM( - logger, "decoder " << codecName << " does not support hw accel: " << hwAcc); return (AV_PIX_FMT_NONE); } if ( @@ -129,11 +110,136 @@ static enum AVPixelFormat find_pix_format( return (AV_PIX_FMT_NONE); } -bool Decoder::initDecoder(const std::string & encoding, const std::string & decoder) +// This function is a adapted version of avcodec_default_get_format + +enum AVPixelFormat get_format(struct AVCodecContext * avctx, const enum AVPixelFormat * fmt) +{ + const AVPixFmtDescriptor * desc; + const AVCodecHWConfig * config; + int i, n; +#ifdef DEBUG_PIXEL_FORMAT + if (avctx->codec) { + printf("codec is: %s\n", avctx->codec->name); + } + char buf[64]; + buf[63] = 0; + for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++) { + av_get_pix_fmt_string(buf, sizeof(buf) - 1, fmt[n]); + printf("offered pix fmt: %d = %s\n", fmt[n], buf); + } +#endif + // If a device was supplied when the codec was opened, assume that the + // user wants to use it. + if (avctx->hw_device_ctx && avcodec_get_hw_config(avctx->codec, 0)) { + AVHWDeviceContext * device_ctx = + reinterpret_cast(avctx->hw_device_ctx->data); + for (i = 0;; i++) { + config = avcodec_get_hw_config(avctx->codec, i); + if (!config) break; + if (!(config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX)) continue; + if (device_ctx->type != config->device_type) continue; + for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++) { + if (config->pix_fmt == fmt[n]) { +#ifdef DEBUG_PIXEL_FORMAT + av_get_pix_fmt_string(buf, sizeof(buf) - 1, fmt[n]); + printf("using pix fmt: %d = %s\n", fmt[n], buf); +#endif + return fmt[n]; + } + } + } + } + // No device or other setup, so we have to choose from things which + // don't any other external information. + + // If the last element of the list is a software format, choose it + // (this should be best software format if any exist). + + for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++) { + } + desc = av_pix_fmt_desc_get(fmt[n - 1]); + if (!(desc->flags & AV_PIX_FMT_FLAG_HWACCEL)) { +#ifdef DEBUG_PIXEL_FORMAT + av_get_pix_fmt_string(buf, sizeof(buf) - 1, fmt[n - 1]); + printf("using unaccelerated last fmt: %d = %s\n", fmt[n - 1], buf); +#endif + return fmt[n - 1]; + } + + // Finally, traverse the list in order and choose the first entry + // with no external dependencies (if there is no hardware configuration + // information available then this just picks the first entry). + for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++) { + for (i = 0;; i++) { + config = avcodec_get_hw_config(avctx->codec, i); + if (!config) break; + if (config->pix_fmt == fmt[n]) break; + } + if (!config) { + // No specific config available, so the decoder must be able + // to handle this format without any additional setup. +#ifdef DEBUG_PIXEL_FORMAT + av_get_pix_fmt_string(buf, sizeof(buf) - 1, fmt[n]); + printf("handle without setup %d = %s\n", fmt[n], buf); +#endif + return fmt[n]; + } + if (config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL) { +// Usable with only internal setup. +#ifdef DEBUG_PIXEL_FORMAT + av_get_pix_fmt_string(buf, sizeof(buf) - 1, fmt[n]); + printf("handle with internal setup %d = %s\n", fmt[n], buf); +#endif + return fmt[n]; + } + } + // Nothing is usable, give up. + return AV_PIX_FMT_NONE; +} + +bool Decoder::initDecoder(const std::vector & decoders) +{ + if (decoders.empty()) { + RCLCPP_ERROR_STREAM(logger_, "no decoders configured for this encoding!"); + throw(std::runtime_error("no decoders configured for this encoding!")); + } + for (const auto & decoder : decoders) { + const AVCodec * codec = avcodec_find_decoder_by_name(decoder.c_str()); + if (codec) { + // use the decoder if it either is software, or has working + // hardware support + if (codec->capabilities & AV_CODEC_CAP_HARDWARE) { + const AVCodecHWConfig * hwConfig = avcodec_get_hw_config(codec, 0); + if (hwConfig) { + if (initSingleDecoder(decoder)) { + return (true); + } + } else { + RCLCPP_INFO_STREAM(logger_, "ignoring decoder with no hardware config: " << decoder); + } + } else { + if (initSingleDecoder(decoder)) { + return (true); + } + } + } else { + RCLCPP_WARN_STREAM(logger_, "unknown decoder: " << decoder); + } + } + if (decoders.size() > 1) { + RCLCPP_ERROR_STREAM(logger_, "none of these requested decoders works: "); + for (const auto & decoder : decoders) { + RCLCPP_ERROR_STREAM(logger_, " " << decoder); + } + } + throw(std::runtime_error("cannot find matching decoder!")); +} + +bool Decoder::initSingleDecoder(const std::string & decoder) { try { - const AVCodec * codec = NULL; - codec = avcodec_find_decoder_by_name(decoder.c_str()); + // utils::get_decoders_for_encoding(); // initialize the map + const AVCodec * codec = avcodec_find_decoder_by_name(decoder.c_str()); if (!codec) { RCLCPP_ERROR_STREAM(logger_, "cannot find decoder " << decoder); throw(std::runtime_error("cannot find decoder " + decoder)); @@ -145,30 +251,35 @@ bool Decoder::initDecoder(const std::string & encoding, const std::string & deco throw(std::runtime_error("alloc context failed!")); } av_opt_set_int(codecContext_, "refcounted_frames", 1, 0); - const std::string hwAcc("cuda"); - enum AVHWDeviceType hwDevType = get_hw_type(hwAcc, logger_); + enum AVHWDeviceType hwDevType = AV_HWDEVICE_TYPE_NONE; + if (codec->capabilities & AV_CODEC_CAP_HARDWARE) { + const AVCodecHWConfig * hwConfig = avcodec_get_hw_config(codec, 0); + if (hwConfig) { + hwDevType = hwConfig->device_type; + RCLCPP_INFO_STREAM( + logger_, + "decoder " << decoder << " has hw accelerator: " << av_hwdevice_get_type_name(hwDevType)); + } else { + RCLCPP_WARN_STREAM(logger_, "decoder " << decoder << " does not have hw acceleration!"); + } + } else { + RCLCPP_INFO_STREAM(logger_, "decoder " << decoder << " has no hardware acceleration"); + } // default - hwPixFormat_ = AV_PIX_FMT_NONE; - if (hwDevType != AV_HWDEVICE_TYPE_NONE) { codecContext_->hw_device_ctx = hw_decoder_init(&hwDeviceContext_, hwDevType, logger_); if (codecContext_->hw_device_ctx != NULL) { - hwPixFormat_ = find_pix_format(encoding, hwDevType, codec, hwAcc, logger_); - // must put in global hash for the callback function - pix_format_map[codecContext_] = hwPixFormat_; - codecContext_->get_format = get_hw_format; - } else { // hardware couldn't be initialized. - hwDevType = AV_HWDEVICE_TYPE_NONE; + hwPixFormat_ = find_pix_format(hwDevType, codec); + codecContext_->get_format = get_format; } } codecContext_->pkt_timebase = timeBase_; if (avcodec_open2(codecContext_, codec, NULL) < 0) { - RCLCPP_ERROR_STREAM(logger_, "open context failed for " + decoder); av_free(codecContext_); codecContext_ = NULL; codec = NULL; - throw(std::runtime_error("open context failed!")); + throw(std::runtime_error("open context failed for " + decoder)); } decodedFrame_ = av_frame_alloc(); cpuFrame_ = (hwPixFormat_ == AV_PIX_FMT_NONE) ? NULL : av_frame_alloc(); @@ -281,6 +392,24 @@ void Decoder::printTimers(const std::string & prefix) const const std::unordered_map & Decoder::getDefaultEncoderToDecoderMap() { - return (defaultMap); + RCLCPP_INFO_STREAM( + rclcpp::get_logger("ffmpeg_decoder"), + "default map is deprecated, use findDecoders() or pass empty string instead!"); + throw(std::runtime_error("default map is deprecated!")); +} + +void Decoder::findDecoders( + const std::string & encoding, std::vector * hw_decoders, + std::vector * sw_decoders) +{ + utils::find_decoders(encoding, hw_decoders, sw_decoders); +} + +std::vector Decoder::findDecoders(const std::string & encoding) +{ + std::vector sw_decoders, all_decoders; + utils::find_decoders(encoding, &all_decoders, &sw_decoders); + all_decoders.insert(all_decoders.end(), sw_decoders.begin(), sw_decoders.end()); + return (all_decoders); } } // namespace ffmpeg_encoder_decoder diff --git a/src/encoder.cpp b/src/encoder.cpp index ed6ca85..8718d45 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -43,6 +43,13 @@ void Encoder::reset() closeCodec(); } +void Encoder::setEncoder(const std::string & n) +{ + Lock lock(mutex_); + encoder_ = n; + encoding_ = utils::find_encoding(encoder_); +} + static void free_frame(AVFrame ** frame) { if (*frame) { @@ -94,10 +101,11 @@ bool Encoder::initialize(int width, int height, Callback callback) return (openCodec(width, height)); } -void Encoder::openVAAPIDevice(const AVCodec * codec, int width, int height) +void Encoder::openHardwareDevice( + const AVCodec * codec, enum AVHWDeviceType hwDevType, int width, int height) { int err = 0; - err = av_hwdevice_ctx_create(&hwDeviceContext_, AV_HWDEVICE_TYPE_VAAPI, NULL, NULL, 0); + err = av_hwdevice_ctx_create(&hwDeviceContext_, hwDevType, NULL, NULL, 0); utils::check_for_err("cannot create hw device context", err); AVBufferRef * hw_frames_ref = av_hwframe_ctx_alloc(hwDeviceContext_); if (!hw_frames_ref) { @@ -105,11 +113,11 @@ void Encoder::openVAAPIDevice(const AVCodec * codec, int width, int height) } AVHWFramesContext * frames_ctx = reinterpret_cast(hw_frames_ref->data); - frames_ctx->format = utils::find_hw_config(&usesHardwareFrames_, AV_HWDEVICE_TYPE_VAAPI, codec); + frames_ctx->format = utils::find_hw_config(&usesHardwareFrames_, hwDevType, codec); if (usesHardwareFrames_) { const auto fmts = utils::get_hwframe_transfer_formats(hw_frames_ref); - frames_ctx->sw_format = utils::get_preferred_pixel_format("h264_vaapi", fmts); + frames_ctx->sw_format = utils::get_preferred_pixel_format(true, fmts); if (pixFormat_ != AV_PIX_FMT_NONE) { RCLCPP_INFO_STREAM( logger_, "user overriding software pix fmt " << utils::pix(frames_ctx->sw_format)); @@ -130,14 +138,14 @@ void Encoder::openVAAPIDevice(const AVCodec * codec, int width, int height) frames_ctx->initial_pool_size = 20; if ((err = av_hwframe_ctx_init(hw_frames_ref)) < 0) { av_buffer_unref(&hw_frames_ref); - utils::throw_err("failed to initialize VAAPI frame context", err); + utils::throw_err("failed to initialize hardware frame context", err); } codecContext_->hw_frames_ctx = av_buffer_ref(hw_frames_ref); av_buffer_unref(&hw_frames_ref); if (codecContext_->hw_frames_ctx == nullptr) { - throw(std::runtime_error("vaapi: cannot create buffer ref!")); + throw(std::runtime_error("hardware decoder: cannot create buffer ref!")); } } @@ -183,17 +191,20 @@ void Encoder::doOpenCodec(int width, int height) auto pixFmts = utils::get_encoder_formats(codecContext_, codec); - codecContext_->bit_rate = bitRate_; - codecContext_->qmax = qmax_; // 0: highest, 63: worst quality bound codecContext_->width = width; codecContext_->height = height; codecContext_->time_base = timeBase_; codecContext_->framerate = frameRate_; + codecContext_->bit_rate = bitRate_; + codecContext_->qmax = qmax_; // 0: highest, 63: worst quality bound codecContext_->gop_size = GOPSize_; - codecContext_->max_b_frames = 0; // nvenc can only handle zero! + codecContext_->max_b_frames = maxBFrames_; // nvenc can only handle zero! - if (encoder_.find("vaapi") != std::string::npos) { - openVAAPIDevice(codec, width, height); + const enum AVHWDeviceType hwDevType = utils::find_hw_device_type(codec); + if (hwDevType != AV_HWDEVICE_TYPE_NONE) { + RCLCPP_INFO_STREAM( + logger_, encoder_ << " uses hw device: " << av_hwdevice_get_type_name(hwDevType)); + openHardwareDevice(codec, hwDevType, width, height); } if (usesHardwareFrames_) { @@ -202,7 +213,9 @@ void Encoder::doOpenCodec(int width, int height) codecContext_->sw_pix_fmt = frames_ctx->sw_format; codecContext_->pix_fmt = frames_ctx->format; } else { - codecContext_->pix_fmt = utils::get_preferred_pixel_format(encoder_, pixFmts); + codecContext_->pix_fmt = (pixFormat_ != AV_PIX_FMT_NONE) + ? pixFormat_ + : utils::get_preferred_pixel_format(false, pixFmts); codecContext_->sw_pix_fmt = codecContext_->pix_fmt; } @@ -218,7 +231,7 @@ void Encoder::doOpenCodec(int width, int height) encoder_.c_str(), profile_.c_str(), preset_.c_str(), bitRate_, qmax_); err = avcodec_open2(codecContext_, codec, NULL); - utils::check_for_err("cannot open codec", err); + utils::check_for_err("cannot open codec " + encoder_, err); RCLCPP_INFO_STREAM(logger_, "opened codec: " << encoder_); frame_ = av_frame_alloc(); @@ -333,14 +346,13 @@ void Encoder::encodeImage(const cv::Mat & img, const Header & header, const rclc } ret = avcodec_send_frame(codecContext_, usesHardwareFrames_ ? hw_frame_ : frame_); - if (measurePerformance_) { t3 = rclcpp::Clock().now(); tdiffSendFrame_.update((t3 - t2).seconds()); } // now drain all packets while (ret == 0) { - ret = drainPacket(header, img.cols, img.rows); + ret = drainPacket(header.frame_id, img.cols, img.rows); } if (measurePerformance_) { const rclcpp::Time t4 = rclcpp::Clock().now(); @@ -348,18 +360,20 @@ void Encoder::encodeImage(const cv::Mat & img, const Header & header, const rclc } } -void Encoder::flush(const Header & header) +void Encoder::flush(const std::string & frame_id) { if (!frame_) { return; } int ret = avcodec_send_frame(codecContext_, nullptr); while (ret == 0) { - ret = drainPacket(header, frame_->width, frame_->height); + ret = drainPacket(frame_id, frame_->width, frame_->height); } } -int Encoder::drainPacket(const Header & header, int width, int height) +void Encoder::flush(const Header & header) { flush(header.frame_id); } + +int Encoder::drainPacket(const std::string & frame_id, int width, int height) { rclcpp::Time t0, t1, t2; if (measurePerformance_) { @@ -379,8 +393,7 @@ int Encoder::drainPacket(const Header & header, int width, int height) } auto it = ptsToStamp_.find(pk.pts); if (it != ptsToStamp_.end()) { - callback_( - header.frame_id, it->second, encoder_, width, height, pk.pts, pk.flags, pk.data, pk.size); + callback_(frame_id, it->second, encoding_, width, height, pk.pts, pk.flags, pk.data, pk.size); if (measurePerformance_) { const auto t3 = rclcpp::Clock().now(); tdiffPublish_.update((t3 - t2).seconds()); diff --git a/src/utils.cpp b/src/utils.cpp index 05f6779..7be4bee 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -16,8 +16,10 @@ #include #include #include +#include #include #include +#include extern "C" { #include @@ -57,6 +59,17 @@ void check_for_err(const std::string & msg, int errnum) } } +enum AVHWDeviceType find_hw_device_type(const AVCodec * codec) +{ + if (codec->capabilities & AV_CODEC_CAP_HARDWARE) { + const AVCodecHWConfig * config = avcodec_get_hw_config(codec, 0); + if (config) { + return (config->device_type); + } + } + return (AV_HWDEVICE_TYPE_NONE); +} + enum AVPixelFormat find_hw_config( bool * usesHWFrames, enum AVHWDeviceType hwDevType, const AVCodec * codec) { @@ -83,10 +96,10 @@ static bool has_format(const std::vector & fmts, const AVPixelFor } enum AVPixelFormat get_preferred_pixel_format( - const std::string & encoder, const std::vector & fmts) + bool useHWFormat, const std::vector & fmts) { - // the only format that worked for vaapi was NV12 - if (encoder.find("vaapi") != std::string::npos) { + if (useHWFormat) { + // the hardware encoders typically use nv12. return (has_format(fmts, AV_PIX_FMT_NV12) ? AV_PIX_FMT_NV12 : AV_PIX_FMT_NONE); } if (has_format(fmts, AV_PIX_FMT_BGR24)) { @@ -133,8 +146,96 @@ std::vector get_hwframe_transfer_formats(AVBufferRef * hwfra formats.push_back(*f); } } + if (fmts) { + av_free(fmts); + } return (formats); } +// find codec by name +static const AVCodec * find_by_name(const std::string & name) +{ + const AVCodec * c; + void * iter = nullptr; + while ((c = av_codec_iterate(&iter))) { + if (c->name == name) { + return (c); + } + } + return (nullptr); +} +static void find_decoders( + AVCodecID encoding, std::vector * decoders, bool with_hw_support) +{ + const AVCodec * c; + void * iter = nullptr; + while ((c = av_codec_iterate(&iter))) { + if (av_codec_is_decoder(c) && c->id == encoding) { + if (with_hw_support) { + if ((c->capabilities & AV_CODEC_CAP_HARDWARE) && (avcodec_get_hw_config(c, 0) != nullptr)) { + decoders->push_back(c->name); + } + } else { + if (!(c->capabilities & AV_CODEC_CAP_HARDWARE)) { + decoders->push_back(c->name); + } + } + } + } +} + +// This function finds the encoding that is the target of a given encoder. +// of {id_of(hevc)} + +static AVCodecID find_id_for_encoder_or_encoding(const std::string & encoder) +{ + const AVCodec * c = find_by_name(encoder); + if (!c) { + const AVCodecDescriptor * desc = NULL; + while ((desc = avcodec_descriptor_next(desc)) != nullptr) { + if (desc->name == encoder) { + return (desc->id); + } + } + throw(std::runtime_error("unknown encoder: " + encoder)); + } + return (c->id); +} + +void find_decoders( + const std::string & encoding, std::vector * hw_decoders, + std::vector * sw_decoders) +{ + // in case the passed in encoding is actually an encoder... + const auto real_encoding = find_id_for_encoder_or_encoding(encoding); + // first use hw accelerated codecs, then software + find_decoders(real_encoding, hw_decoders, true); + find_decoders(real_encoding, sw_decoders, false); +} + +std::string find_encoding(const std::string & encoder) +{ + const AVCodec * c = find_by_name(encoder); + if (!c) { + throw(std::runtime_error("unknown encoder: " + encoder)); + } + const AVCodecDescriptor * desc = NULL; + while ((desc = avcodec_descriptor_next(desc)) != nullptr) { + if (desc->id == c->id) { + return (desc->name); + } + } + throw(std::runtime_error("weird ffmpeg config error???")); +} + +std::vector get_hwdevice_types() +{ + std::vector types; + for (enum AVHWDeviceType type = av_hwdevice_iterate_types(AV_HWDEVICE_TYPE_NONE); + type != AV_HWDEVICE_TYPE_NONE; type = av_hwdevice_iterate_types(type)) { + types.push_back(av_hwdevice_get_type_name(type)); + } + return (types); +} } // namespace utils } // namespace ffmpeg_encoder_decoder diff --git a/test/encoder_test.cpp b/test/encoder_test.cpp index a9cb44e..38cca60 100644 --- a/test/encoder_test.cpp +++ b/test/encoder_test.cpp @@ -21,6 +21,7 @@ #include static const char * g_codec = "libx264"; +static const char * g_encoding = "h264"; static const char * g_frame_id = "frame_id"; static const int g_width = 1920; // must be mult of 64 for some codecs! static const int g_height = 1080; @@ -39,7 +40,7 @@ void packetReady( EXPECT_EQ(static_cast(width), g_width); EXPECT_EQ(static_cast(height), g_height); EXPECT_EQ(frame_id, g_frame_id); - EXPECT_EQ(codec, g_codec); + EXPECT_EQ(codec, g_encoding); g_frame_counter++; } From 5eed6381ace7913524eb15fa2b42c1c685ef0829 Mon Sep 17 00:00:00 2001 From: Bernd Pfrommer Date: Fri, 18 Jul 2025 01:38:40 -0400 Subject: [PATCH 02/12] decoder flush(), decoder testing, improved api docs --- CMakeLists.txt | 4 +- README.md | 63 ++++- include/ffmpeg_encoder_decoder/decoder.hpp | 210 ++++++++++---- include/ffmpeg_encoder_decoder/encoder.hpp | 308 +++++++++++++++++---- include/ffmpeg_encoder_decoder/pts_map.hpp | 35 +++ include/ffmpeg_encoder_decoder/utils.hpp | 125 ++++++--- package.xml | 3 +- src/decoder.cpp | 180 +++++++----- src/encoder.cpp | 60 ++-- src/utils.cpp | 46 ++- test/encoder_test.cpp | 291 ++++++++++++++++--- 11 files changed, 1034 insertions(+), 291 deletions(-) create mode 100644 include/ffmpeg_encoder_decoder/pts_map.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 15a82ce..615b2b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -138,13 +138,15 @@ if(BUILD_TESTING) ament_pep257() ament_xmllint() + find_package(ffmpeg_image_transport_msgs) find_package(ament_cmake_gtest REQUIRED) ament_add_gtest(${PROJECT_NAME}_encoder_test test/encoder_test.cpp WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/test) target_include_directories(${PROJECT_NAME}_encoder_test PUBLIC $ $) - target_link_libraries(${PROJECT_NAME}_encoder_test ${PROJECT_NAME}) + target_link_libraries(${PROJECT_NAME}_encoder_test + ${ffmpeg_image_transport_msgs_TARGETS} ${PROJECT_NAME}) endif() ament_export_include_directories(include) diff --git a/README.md b/README.md index 7104c07..4046129 100644 --- a/README.md +++ b/README.md @@ -34,11 +34,68 @@ and follow the [instructions here](https://github.com/ros-misc-utilities/.github Make sure to source your workspace's ``install/setup.bash`` afterwards. -## Parameters +## ROS Parameters -This package has no parameters. It is the upper layer's responsibility to e.g. manage the mapping between encoder and decoder, i.e. to tell the decoder class which libav decoder should be used for the decoding, or to set the encoding parameters. +This package does not expose ROS parameters. It is the upper layer's responsibility to e.g. manage the mapping between encoder and decoder, i.e. to tell the decoder class which libav decoder should be used for the decoding, or to set the encoding parameters. -### How to use a custom version of libav (aka ffmpeg) + +## API usage + +### Encoder + +Using the encoder involves the following steps: +- instantiating the ``Encoder`` object. +- setting properties like the libav encoder to use, the encoding formats, and AV options. +- initializing the encoder object. This requires knowledge of the image size and therefore + can only be done when the first image is available. Note that many properties + (encoding formats etc) must have been set *before* initializing the encoder. +- feeding images to the encoder (and handling the callbacks when encoded packets become available) +- flushing the encoder (may result in additional callbacks with encoded packets) +- destroying the ``Encoder`` object. + +The ``Encoder`` class description has a short example code snippet. + +When using libav it is important to understand the difference between the encoder and the codec. The *codec* is the standardized format in which images are encoded, for instance ``h264`` or ``hevc``. The *encoder* is a software module that can encode images for a given codec. For instance ``libx264``, ``libx264rgb``, ``h264_nvenc``, and ``h264_vaapi`` are all *encoders* that encode for the *codec* h264. +Some of the encoders are hardware accelerated, some can handle more image formats than others, but in the end they all encode video for a specific codec. You set the libav encoder with the ``setEncoder()`` method. + +For the many AV options available for various libav encoders, and for ``qmax``, ``bit_rate`` and similar settings please refer to the ffmpeg documentation. + +The topic of "pixel formats" (also called "encodings") can be very confusing and frustrating, so here a few lines about it. +- Images in ROS arrive as messages that are encoded in one of the formats allowed by the [sensor\_msgs/Image encodings](https://github.com/ros2/common_interfaces/blob/a2ef867438e6d4eed074d6e3668ae45187e7de86/sensor_msgs/include/sensor_msgs/image_encodings.hpp). If you pass that message in via the ``encodeImage(const Image & msg)`` message, it will first will be converted to an opencv matrix using the [cv_bridge](https://github.com/ros-perception/vision_opencv). +If you don't set the ``cv_bridge_target_format`` via ``setCVBridgeTargetFormat(const std::string & fmt)``, the image will be converted to the default format of ``bgr8``. Depending on how performance sensitive you are (or if you have a ``mono8`` (gray) image!), this may not be what you want. Ideally, you should pick a ``cv_bridge_target_format`` that can be directly used by the libav decoder that you have chosen. You can use ffmpeg to list the formats that the libav encoder directly supports: + ``` + ffmpeg -h encoder=libx264 | grep formats + ``` + Note that the ffmpeg/libav format strings often diverge from the ROS encoding strings, so some guesswork and experimentation may be necessary to pick the right ``cv_bridge_target_format``. + If you choose to bypass the cv\_bridge conversion by feeding the images to the encoder directly via the ``encodeImage(const cv::Mat & img ...)`` method, you must still set the ``cv_bridge_target_format`` such that encoder knows what format the ``img`` argument has. +- Once the image is available in ``cv_bridge_target_format`` the encoder may perform another conversion to an image format that the libav encoder supports, the ``av_source_pixel_format``. Again, ``ffmpeg -h encoder=`` shows the supported formats. If you don't set the ``av_source_pixel_format`` with ``setAVSourcePixelFormat()``, the encoder will try to pick one that is supported by the libav encoder. That often works but may not be optimal. +- Finally, the libav encoder produces a packet in its output codec, e.g. ``h264``, ``hevc`` etc. This encoding format is provided as the ``codec`` parameter when the encoder calls back with the encoded packet. Later, the codec needs to be provided to the decoder so it can pick a suitable libav decoder. + +To summarize, the conversion goes as follows: +``` + -> cv_bridge_target_format -> av_source_pixel_format --(libav encoder)--> codec +``` +By properly choosing ``cv_bridge_target_format`` and ``av_source_pixel_format`` two of those conversions +may become no-ops, but to what extend the cv\_bridge and libav actually recognize and optimize for this has not been looked into yet. + +Note that only very few combinations of libav encoders, ``cv_bridge_target_format`` and ``av_source_pixel_format`` have been tested. Please provide feedback if you observe crashes or find obvious bugs. PRs are always appreciated! + +### Decoder + +Using the decoder involves the following steps: +- instantiating the ``Decoder`` object. +- if so desired, setting the ROS output (image encoding) format. +- initializing the decoder object. For this you need to know the encoding (codec, e.g. "h264"). + During initialization you also have to present an ordered list of libav decoders that + should be used. If an empty list is provided, the decoder will attempt to find a suitable + libav decoder based on the encoding, with a preference for hardware accelerated decoding. +- feeding encoded packets to the decoder (and handling the callbacks + when decoded images become available) +- flushing the decoder (may result in additional callbacks with decoded images) +- destroying the ``Decoder`` object. +The ``Decoder`` class description has a short example code snippet. + +## How to use a custom version of libav (aka ffmpeg) Compile *and install* ffmpeg. Let's say the install directory is ``/home/foo/ffmpeg/build``, then for it to be found while building, diff --git a/include/ffmpeg_encoder_decoder/decoder.hpp b/include/ffmpeg_encoder_decoder/decoder.hpp index 727a7ea..57dbaf2 100644 --- a/include/ffmpeg_encoder_decoder/decoder.hpp +++ b/include/ffmpeg_encoder_decoder/decoder.hpp @@ -16,6 +16,7 @@ #ifndef FFMPEG_ENCODER_DECODER__DECODER_HPP_ #define FFMPEG_ENCODER_DECODER__DECODER_HPP_ +#include #include #include #include @@ -36,114 +37,215 @@ extern "C" { namespace ffmpeg_encoder_decoder { +/** + * \brief Decodes ffmpeg encoded messages via libav (ffmpeg). + * + * The Decoder class facilitates decoding of messages that have been encoded with the + * Encoder class by leveraging libav, the collection of libraries used by ffmpeg. + * Sample code: + ``` +void imageCallback(const sensor_msgs::msg::Image::ConstSharedPtr & img, bool isKeyFrame) +{ + // process decoded image here... +} + +ffmpeg_encoder_decoder::Decoder decoder; +ffmpeg_image_transport_msgs::msg::FFMPEGPacket msg; +msg.header.frame_id = "frame_id"; +msg.width = 640; +msg.height = 480; +msg.encoding = "hevc"; +msg.data.resize(10000, 0); // Obviously this is not a valid packet!!! + +if (!decoder.isInitialized()) { + decoder.initialize(msg.encoding, imageCallback, "hevc_cuvid"); +} + +for (int64_t i = 0; i < 10; i++) { + msg.header.stamp = rclcpp::Time(i, RCL_SYSTEM_TIME); + if (!decoder.decodePacket( + msg.encoding, &msg.data[0], msg.data.size(), msg.pts, msg.header.frame_id, + msg.header.stamp)) { + throw(std::runtime_error("error decoding packet!")); + } +} +decoder.flush(); +``` + */ class Decoder { public: + /** + * \brief callback function signature + * \param img pointer to decoded image + * \param isKeyFrame true if the decoded image is a keyframe + */ using Callback = std::function; - using PTSMap = std::unordered_map; + /** + * \brief Constructor. + */ Decoder(); + + /** + * \brief Destructor. Calls reset(); + */ ~Decoder(); + /** * Test if decoder is initialized. - * @return true if the decoder is initialized. + * \return true if the decoder is initialized. */ bool isInitialized() const { return (codecContext_ != NULL); } + /** - * Initialize decoder, with multiple decoders to pick from. - * Will pick hardware accelerated decoders first if available. - * If decoders.empty() a default decoder will be chosen (if available). - * @param encoding the encoding from the first packet. Can never change! - * @param callback the function to call when frame has been decoded - * @param decoder the decoder to use. If empty string, - * the decoder will try to find a suitable one based on the encoding - * @return true if successful - */ - - bool initialize(const std::string & encoding, Callback callback, const std::string & decoder); - /** - * Initialize decoder with multiple decoders to pick from. - * Will pick hardware accelerated decoders first if available. - * If decoders.empty() a default decoder will be chosen (if available). - * @param encoding the encoding from the first packet. Can never change! - * @param callback the function to call when frame has been decoded - * @param decoders the set of decoders to try sequentially. If empty() - * the decoder will try to find a suitable one based on the encoding - * @return true if successful - */ + * \brief Initializes the decoder for a given codec and libav decoder. + * + * Initializes the decoder, with multiple decoders to pick from. + * If the name of the libav decoder string is empty, a suitable libav decoder + * will be picked, or the initialization will fail if none is available. + * \param codec the codec (encoding) from the first packet. Can never change! + * \param callback the function to call when frame has been decoded. + * \param decoder the name of the libav decoder to use. If empty string, + * the decoder will try to find a suitable one based on the encoding. + * \return true if initialized successfully. + */ + bool initialize(const std::string & codec, Callback callback, const std::string & decoder); + + /** + * \ brief initializes the decoder, trying different libavdecoders in order. + * + * Initialize decoder with multiple libav decoders to pick from. + * If decoders.empty() a default decoder will be chosen (if available). + * \param codec the codec (encoding) from the first packet. Can never change! + * \param callback the function to call when frame has been decoded. + * \param decoders names of the libav decoders to try sequentially. If empty() + * the decoder will try to find a suitable one based on the codec. + * \return true if successful + */ bool initialize( - const std::string & encoding, Callback callback, const std::vector & decoders); + const std::string & codec, Callback callback, const std::vector & decoders); + /** - * Clears all decoder state but not timers, loggers, and other settings. + * \brief Sets the ROS output message encoding format. + * + * Sets the ROS output message encoding format. Must be compatible with one of the + * libav encoding formats, or else exception will be thrown. If not set, + * the output encoding will default to bgr8. + * \param output_encoding defaults to bgr8 + * \throw std::runtime_error() if no matching libav pixel format could be found + */ + void setOutputMessageEncoding(const std::string & output_encoding); + + /** + * \brief Clears all decoder state except for timers, loggers, and other settings. */ void reset(); + /** + * \brief Decodes encoded packet. + * * Decodes packet. Decoder must have been initialized beforehand. Calling this * function may result in callback with decoded frame. - * @param encoding the name of the encoding (typically from msg encoding) - * @param data pointer to packet data - * @param size size of packet data - * @param pts presentation time stamp of data packet - * @param frame_id ros frame id (from message header) - * @param stamp ros message header time stamp - */ + * \param encoding the name of the encoding/codec (typically from msg encoding) + * \param data pointer to packet data + * \param size size of packet data + * \param pts presentation time stamp of data packet + * \param frame_id ros frame id (from message header) + * \param stamp ros message header time stamp + * \return true if decoding was successful + */ bool decodePacket( const std::string & encoding, const uint8_t * data, size_t size, uint64_t pts, const std::string & frame_id, const rclcpp::Time & stamp); + /** - * Override default logger - * @param logger the logger to override the default with - */ - void setLogger(rclcpp::Logger logger) { logger_ = logger; } + * \brief Flush decoder. + * + * This method can only be called once at the end of the decoding stream. + * It will force any buffered packets to be delivered as frames. No further + * packets can be fed to the decoder after calling flush(). + * \return true if flush was successful (libav returns EOF) + */ + bool flush(); + /** - * deprecated, don't use! + * \brief Overrides the default ("Decoder") logger. + * \param logger the logger to override the default ("Decoder") with. */ - static const std::unordered_map & getDefaultEncoderToDecoderMap(); + void setLogger(rclcpp::Logger logger) { logger_ = logger; } + /** - * Finds the name of hardware and software decoders that match a - * certain encoding (or encoder) + * \brief Finds all hardware and software decoders for a given codec. + * + * Finds the name of all hardware and software decoders that match + * a certain codec (or encoder). + * \param codec name of the codec, i.e. h264, hevc etc + * \param hw_decoders non-null pointer for returning list of hardware decoders + * \param sw_decoders non-null pointer for returning list of software decoders */ static void findDecoders( - const std::string & encoding, std::vector * hw_decoders, + const std::string & codec, std::vector * hw_decoders, std::vector * sw_decoders); + /** - * Finds the name of all hardware and software decoders that match - * a certain encoding (or encoder) - */ - static std::vector findDecoders(const std::string & encoding); + * \brief Finds all decoders that can decode a given codec. + * + * Finds the name of all hardware and software decoders (combined) + * that match a certain codec (or encoder). + * \param codec name of the codec, i.e. h264, hevc etc + * \return vector with names of matching libav decoders + */ + static std::vector findDecoders(const std::string & codec); + /** - * For performance debugging + * \brief Enables or disables performance measurements. Poorly tested, may be broken. + * \param p set to true to enable performance debugging. */ void setMeasurePerformance(bool p) { measurePerformance_ = p; } + /** - * For performance debugging + * \brief Prints performance timers. Poorly tested, may be broken. + * \param prefix for labeling the printout */ void printTimers(const std::string & prefix) const; + /** - * For performance debugging + * \brief resets performance debugging timers. Poorly tested, may be broken. */ void resetTimers(); + // ------------------- deprecated functions --------------- + /** + * \deprecated Use findDecoders(codec) instead. + */ + [[deprecated( + "use findDecoders(codec) now.")]] static const std::unordered_map & + getDefaultEncoderToDecoderMap(); + private: - rclcpp::Logger logger_; bool initSingleDecoder(const std::string & decoder); bool initDecoder(const std::vector & decoders); + int receiveFrame(); // --------------- variables + rclcpp::Logger logger_; Callback callback_; - PTSMap ptsToStamp_; // mapping of header - + PTSMap ptsToStamp_; // --- performance analysis bool measurePerformance_{false}; TDiff tdiffTotal_; - // --- libav stuff + // --- libav related variables AVRational timeBase_{1, 100}; std::string encoding_; AVCodecContext * codecContext_{NULL}; - AVFrame * decodedFrame_{NULL}; + AVFrame * swFrame_{NULL}; AVFrame * cpuFrame_{NULL}; - AVFrame * colorFrame_{NULL}; + AVFrame * outputFrame_{NULL}; SwsContext * swsContext_{NULL}; enum AVPixelFormat hwPixFormat_; + std::string outputMsgEncoding_; + enum AVPixelFormat outputAVPixFormat_ { AV_PIX_FMT_NONE }; + int bitsPerPixel_; // output format bits/pixel including padding AVPacket packet_; AVBufferRef * hwDeviceContext_{NULL}; }; diff --git a/include/ffmpeg_encoder_decoder/encoder.hpp b/include/ffmpeg_encoder_decoder/encoder.hpp index 3d6637b..bacaee3 100644 --- a/include/ffmpeg_encoder_decoder/encoder.hpp +++ b/include/ffmpeg_encoder_decoder/encoder.hpp @@ -16,6 +16,7 @@ #ifndef FFMPEG_ENCODER_DECODER__ENCODER_HPP_ #define FFMPEG_ENCODER_DECODER__ENCODER_HPP_ +#include #include #include #include @@ -39,79 +40,199 @@ extern "C" { namespace ffmpeg_encoder_decoder { +/** + * \brief Encodes ROS image messages via libav (ffmpeg). + * + * The Encoder class facilitates encoding of ROS images by leveraging libav, + * the collection of libraries used by ffmpeg. + * Sample code: + ``` + // handle the encoded packet in this function + void packetReady( + const std::string & frame_id, const rclcpp::Time & stamp, const std::string & codec, + uint32_t width, uint32_t height, uint64_t pts, uint8_t flags, uint8_t * data, size_t sz) { + // do something useful here ... + } + + ffmpeg_encoder_decoder::Encoder enc; + enc.setEncoder("libx265"); // set libav encoder + // note: will not be lossless unless we happen to have right pixel format + enc.setAVOption("x265-params", "lossless=1"); + enc.setAVOption("crf", "0"); + + if (!enc.initialize(image_width, image_height, callback_fn)) { + std::cerr << "failed to initialize encoder!" << std::endl; + exit (-1); + } + sensor_msgs::msg::Image image; + image.encoding = "bgr8"; + image.width = image_width; + image.height = image_height; + image.step = image.width * 3; // 3 bytes per pixel + image.header.frame_id = "frame_id"; + image.is_bigendian = false; + + for (int64_t i = 0; i < numFrames; i++) { + image.header.stamp = rclcpp::Time(i + 1, RCL_SYSTEM_TIME); + image.data.resize(image.step * image.height, static_cast(i)); + enc.encodeImage(image); // may produce callback + } + enc.flush(); // may produce callback +} + ``` + */ class Encoder { public: - using Lock = std::unique_lock; + /** + * \brief defines callback function signature. + * + * The encoder will call this function when an encoded packet is ready. + * \param frame_id the frame_id of the message that produced the packet + * \param stamp the ROS time stamp of the message that produced the packet + * \param codec the codec used for compression: h264, hevc + * \param width image width + * \param height image height + * \param pts libav presentation time stamp (pts) + * \param data pointer to encoded packet + * \param sz size of encoded packet + */ using Callback = std::function; + /** + * \brief Constructor. Doesn't open codec or do anything else dangerous. + */ Encoder(); + /** + * \brief Destructor. Clears all state and frees all memory. + */ ~Encoder(); - // ------- various encoding settings - void setEncoder(const std::string & n); - - void setProfile(const std::string & p) - { - Lock lock(mutex_); - profile_ = p; - } - void setPreset(const std::string & p) + /** + * \brief sets the name of libav encoder to be used + * \param encoder name of the libav encoder, e.g. libx264, hevc_nvenc etc + */ + void setEncoder(const std::string & encoder_name); + /** + * \brief adds AVOption setting to list of options to be applied before opening the encoder + * \param key name of AVOption to set, e.g. "preset" + * \param value value of AVOption e.g. "slow" + */ + void addAVOption(const std::string & key, const std::string & value) { Lock lock(mutex_); - preset_ = p; + avOptions_.push_back({key, value}); } - void setTune(const std::string & p) + + /** + * This is the format of the image that will be fed into the encoder. + * In the README, this is referred to as av_source_pixel_format. + * + * If your ROS message format (cv_bridge_target_format) is different, + * the image will first be converted to the format you set here. + * Must be set before calling initialize(), or else the encoder + * will try to pick some suitable format. See README for more + * details about formats. + * + * \param fmt The pixel format in libav naming convention, e.g. "yuv420p" + */ + void setAVSourcePixelFormat(const std::string & fmt) { Lock lock(mutex_); - tune_ = p; + pixFormat_ = pixelFormat(fmt); } - void setPixelFormat(const std::string & p) + /** + * \brief Set cv_bridge_target_format for first image conversion + * + * Sets the target format of the first image conversion done via + * cv_bridge (cv_bridge_target_format). See explanation in the + * README file. Must be set before calling initialize(), or else + * it will default to "bgr8". The cv_bridge_target_format must + * be compatible with some AV_PIX_FMT* type, or else an exception + * will be thrown when calling initialize(). + * + * \param fmt cv_bridge_target_format (see ROS image_encodings.hpp header) + */ + void setCVBridgeTargetFormat(const std::string & fmt) { Lock lock(mutex_); - pixFormat_ = pixelFormat(p); + cvBridgeTargetFormat_ = fmt; } - void setDelay(const std::string & p) + + /** + * \brief Sets q_max parameter. See ffmpeg docs for explanation. + * + * \param q q_max parameter + */ + void setQMax(int q) { Lock lock(mutex_); - delay_ = p; + qmax_ = q; } - void setCRF(const std::string & c) + /** + * \brief Sets bitrate parameter. See ffmpeg docs for explanation. + * \param bitrate bitrate parameter (in bits/s) + */ + void setBitRate(int bitrate) { Lock lock(mutex_); - crf_ = c; + bitRate_ = bitrate; } - void setQMax(int q) + /** + * \brief gets current bitrate parameter. + * \return the bitrate in bits/sec + */ + int getBitRate() const { Lock lock(mutex_); - qmax_ = q; + return (bitRate_); } - void setBitRate(int r) + + /** + * \brief Sets gop size (max distance between keyframes). See ffmpeg docs. + * \param gop_size max distance between keyframes + */ + void setGOPSize(int g) { Lock lock(mutex_); - bitRate_ = r; + GOPSize_ = g; } + /** + * \brief gets current gop size. + * \return the gop_size (max distance between keyframes) + */ int getGOPSize() const { Lock lock(mutex_); return (GOPSize_); } - void setGOPSize(int g) + /** + * \brief sets maximum number of b-frames. See ffmpeg docs. + * \param b max number of b-frames. + */ + void setMaxBFrames(int b) { Lock lock(mutex_); - GOPSize_ = g; + maxBFrames_ = b; } + /** + * \brief gets maximum number of b-frames. See ffmpeg docs. + * \return max number of b-frames. + */ int getMaxBFrames() const { Lock lock(mutex_); return (maxBFrames_); } - void setMaxBFrames(int b) - { - Lock lock(mutex_); - maxBFrames_ = b; - } + /** + * \brief Sets encoding frame rate as a rational number. + * + * Not sure what it really does. + * + * \param frames the number of frames during the interval + * \param second the time interval (integer seconds) + */ void setFrameRate(int frames, int second) { Lock lock(mutex_); @@ -120,50 +241,120 @@ class Encoder timeBase_.num = second; timeBase_.den = frames; } + /** + * \brief enables or disables performance measurements. Poorly tested, hardly used. + * + * \param p true enables performance statistics + */ void setMeasurePerformance(bool p) { Lock lock(mutex_); measurePerformance_ = p; } - // ------- teardown and startup + // ------- related to teardown and startup + /** + * \brief test if encoder is initialized + * \return true if encoder is initialized + */ bool isInitialized() const { Lock lock(mutex_); return (codecContext_ != NULL); } + /** + * \brief initializes encoder. + * \param width image width + * \param height image height + * \param callback the function to call for handling encoded packets + */ bool initialize(int width, int height, Callback callback); + /** + * \brief sets ROS logger to use for info/error messages + * \param logger the logger to use for messages + */ void setLogger(rclcpp::Logger logger) { logger_ = logger; } + /** + * \brief completely resets the state of the encoder. + */ void reset(); - // encode image - void encodeImage(const cv::Mat & img, const Header & header, const rclcpp::Time & t0); void encodeImage(const Image & msg); - /**! - * flush all packets (produces callbacks). - * \param frame_id the frame id that will be presented on callback + /** + * \brief flush all packets (produces callbacks). */ - void flush(const std::string & frame_id); - /**! - * flush all packets (produces callbacks). - * \deprecated Only header.frame_id is used. Used flush(frame_id) now. - */ - void flush(const Header & header); - - /**! - * finds the encoding for a given encoder, i.e. returns h264 for h264_vaapi + void flush(); + /** + * \brief finds the codec for a given encoder, i.e. returns h264 for h264_vaapi + * \param encoder name of libav encoder + * \return codec (libav naming convention) */ - static std::string findEncoding(const std::string & encoder); + static std::string findCodec(const std::string & encoder); // ------- performance statistics void printTimers(const std::string & prefix) const; void resetTimers(); + // ------- deprecated functions + /** + * \deprecated use addAVOption("profile", "value") now. + */ + [[deprecated("use addAVOption() instead.")]] void setProfile(const std::string & p) + { + addAVOption("profile", p); + } + /** + * \deprecated use addAVOption("preset", "value") now. + */ + [[deprecated("use addAVOption() instead.")]] void setPreset(const std::string & p) + { + addAVOption("preset", p); + } + /** + * \deprecated use addAVOption("tune", "value") now. + */ + [[deprecated("use addAVOption() instead.")]] void setTune(const std::string & p) + { + addAVOption("tune", p); + } + /** + * \deprecated use addAVOption("delay", "value") now. + */ + [[deprecated("use addAVOption() instead.")]] void setDelay(const std::string & p) + { + addAVOption("delay", p); + } + /** + * \deprecated use addAVOption("crf", "value") now. + */ + [[deprecated("use addAVOption() instead.")]] void setCRF(const std::string & c) + { + addAVOption("crf", c); + } + /** + * \deprecated Use setAVSourcePixelFormat() + */ + [[deprecated("use setAVSourcePixelFormat(fmt) instead.")]] void setPixelFormat( + const std::string & fmt) + { + setAVSourcePixelFormat(fmt); + } + /** + * \deprecated use encodeImage(const Image &msg) instead. + */ + [[deprecated("use encodeImage(const Image &msg) instead.")]] void encodeImage( + const cv::Mat & img, const Header & header, const rclcpp::Time & t0); + /** + * flush all packets (produces callbacks). + * \deprecated Only header.frame_id is used. Used flush(frame_id) now. + */ + [[deprecated("use flush() instead.")]] void flush(const Header & header); private: - using PTSMap = std::unordered_map; + using Lock = std::unique_lock; bool openCodec(int width, int height); void doOpenCodec(int width, int height); void closeCodec(); - int drainPacket(const std::string & frame_id, int width, int height); + void doEncodeImage(const cv::Mat & img, const Header & header, const rclcpp::Time & t0); + int drainPacket(int width, int height); AVPixelFormat pixelFormat(const std::string & f) const; void openHardwareDevice( const AVCodec * codec, enum AVHWDeviceType hwDevType, int width, int height); @@ -173,19 +364,16 @@ class Encoder mutable std::recursive_mutex mutex_; Callback callback_; // config - std::string encoder_; // e.g. "libx264" - std::string encoding_; // e.g. "h264" - std::string preset_; // e.g. "slow", "medium", "lossless" - std::string profile_; // e.g. "main", "high", "rext" - std::string tune_; // e.g. "tune" - std::string delay_; // default is 4 frames for parallel processing. 0 is lowest latency - std::string crf_; // constant rate factor. 0 is lossless, 51 is worst quality - int qmax_{-1}; // max allowed quantization. The lower the better quality - int GOPSize_{-1}; // distance between two keyframes - int maxBFrames_{-1}; // maximum number of b-frames - int64_t bitRate_{0}; // max rate in bits/s + std::string encoder_; // e.g. "libx264" + std::string codec_; // e.g. "h264" + int qmax_{-1}; // max allowed quantization. The lower the better quality + int GOPSize_{-1}; // distance between two keyframes + int maxBFrames_{-1}; // maximum number of b-frames + int64_t bitRate_{0}; // max rate in bits/s + std::vector> avOptions_; AVPixelFormat pixFormat_{AV_PIX_FMT_NONE}; + std::string cvBridgeTargetFormat_ = "bgr8"; AVRational timeBase_{1, 100}; AVRational frameRate_{100, 1}; bool usesHardwareFrames_{false}; @@ -201,7 +389,7 @@ class Encoder // ---------- other stuff int64_t pts_{0}; PTSMap ptsToStamp_; - // performance analysis + // ---------- performance analysis bool measurePerformance_{true}; int64_t totalInBytes_{0}; int64_t totalOutBytes_{0}; diff --git a/include/ffmpeg_encoder_decoder/pts_map.hpp b/include/ffmpeg_encoder_decoder/pts_map.hpp new file mode 100644 index 0000000..9d5a18e --- /dev/null +++ b/include/ffmpeg_encoder_decoder/pts_map.hpp @@ -0,0 +1,35 @@ +// -*-c++-*--------------------------------------------------------------------------------------- +// Copyright 2025 Bernd Pfrommer +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef FFMPEG_ENCODER_DECODER__PTS_MAP_HPP_ +#define FFMPEG_ENCODER_DECODER__PTS_MAP_HPP_ + +#include +#include +#include +#include + +namespace ffmpeg_encoder_decoder +{ +struct PTSMapEntry +{ + PTSMapEntry(const rclcpp::Time & t, const std::string & f) : time(t), frame_id(f) {} + rclcpp::Time time; + std::string frame_id; +}; + +using PTSMap = std::unordered_map; +} // namespace ffmpeg_encoder_decoder +#endif // FFMPEG_ENCODER_DECODER__PTS_MAP_HPP_ diff --git a/include/ffmpeg_encoder_decoder/utils.hpp b/include/ffmpeg_encoder_decoder/utils.hpp index ff11ba4..1b96825 100644 --- a/include/ffmpeg_encoder_decoder/utils.hpp +++ b/include/ffmpeg_encoder_decoder/utils.hpp @@ -29,64 +29,113 @@ namespace ffmpeg_encoder_decoder { namespace utils { -/**! -* convert av pixel format to clear text string -*/ +/** + * \brief Convert av pixel format to clear text string. + * \return string with name of pixel format + */ std::string pix(const AVPixelFormat & f); -/**! -* convert av error number to string -*/ + +/** + * \brief Convert av error number to string. + * \param errnum libav error number + * \return clear text string with error message + */ std::string err(int errnum); -/**! -* throws runtime_error() with decoded av error string -*/ + +/** + * \brief throws runtime_error() with decoded av error string + * \param msg message (free form text) + * \param errnum libav error number + */ void throw_err(const std::string & msg, int errnum); -/**! -* checks for error and throws runtime_error() with av error string -*/ + +/** + * \brief checks for error and throws runtime_error() with av error string + * \param msg error message (free form) + * \param errnum libav error number + */ void check_for_err(const std::string & msg, int errnum); -/**! -* finds hardware configuration, in particular the target pixel format -* and whether the encoder uses hardware frame upload -*/ + +/** + * \brief finds hardware configuration. + * + * Finds hardware configuration, in particular the target pixel format + * and whether the encoder uses hardware frame upload. + * \param usesHWFrames will be set to true if hw frames are used + * \param hwDevType the hardware device type to probe get the config for + * \param codec the codec to get the config for + * \return the hardware pixel format to use, or AV_PIX_FMT_NONE + */ enum AVPixelFormat find_hw_config( bool * usesHWFrames, enum AVHWDeviceType hwDevType, const AVCodec * codec); + +/** + * \brief Gets all pixel formats that can be transferred to the hardware device + * \param hwframe_ctx the hardware frame context for which transfer is intended + * \return vector of allowed pixel formats + */ std::vector get_hwframe_transfer_formats(AVBufferRef * hwframe_ctx); -/**! -* finds formats that the encoder supports. Note that for VAAPI, this will just -* return AV_PIX_FMT_VAAPI since it uses hardware frames. -*/ +/** + * \brief finds all formats that the encoder supports. + * + * Note that for VAAPI, this will justreturn AV_PIX_FMT_VAAPI since it uses hardware frames. + * \param @return vector with pixel formats supported by codec in this context + */ std::vector get_encoder_formats( - AVCodecContext * context, const AVCodec * avctx); -/**! -* picks from a vector of formats the "best" pixel format for a given encoder -*/ + const AVCodecContext * context, const AVCodec * avctx); + +/** + * \brief gets the preferred pixel format from list. + * \param useHWFormat if true only return hardware accelerated formats + * \param fmts vector of formats to choose from + * \return preferred pixel format + */ enum AVPixelFormat get_preferred_pixel_format( bool useHWFormat, const std::vector & fmts); -/**! -* finds the names of all available decoders for a given encoding (or encoder) -*/ +/** + * \brief finds the names of all available decoders + * for a given codec (or encoder) + * \param codec the codec / encoding to find decoders for + * \param hw_decoders (output) non-null ptr to hardware decoders + * \param sw_decoders (output) non-null ptr to software decoders + */ void find_decoders( - const std::string & encoding, std::vector * hw_decoders, + const std::string & codec, std::vector * hw_decoders, std::vector * sw_decoders); -/**! -* * get hardware device types -*/ +/** + * \brief gets list of names of all libav supported device types + * + * This is not the list of devices present on the host machine, just + * the ones that libav supports, whether they are present or not. + * \return list of all libav supported device types + */ std::vector get_hwdevice_types(); -/**! -* find encoding for given encoder -*/ -std::string find_encoding(const std::string & encoder); +/** + * \brief find codec for a given encoder + * \param encoder name of libav encoder + * \return name of codec + */ +std::string find_codec(const std::string & encoder); -/**! -* gets hardware device type for specific codec -*/ +/** + * \brief gets hardware device type for specific codec + * \param codec pointer to libav codec + * \param return associated hardware device type + */ enum AVHWDeviceType find_hw_device_type(const AVCodec * codec); +/** + * \brief finds AVPixelFormat corresponding to ROS encoding + * \param ros_pix_fmt ros encoding name, e.g. "bgr8" + * \return corresponding AV pixel format + * \throws std::runtime_error exception when no match found. + */ +enum AVPixelFormat ros_to_av_pix_format(const std::string & ros_pix_fmt); + } // namespace utils } // namespace ffmpeg_encoder_decoder #endif // FFMPEG_ENCODER_DECODER__UTILS_HPP_ diff --git a/package.xml b/package.xml index 8a53841..700c71f 100644 --- a/package.xml +++ b/package.xml @@ -14,12 +14,13 @@ cv_bridge ffmpeg - libavdevice-dev + libavdevice-dev libopencv-imgproc-dev rclcpp sensor_msgs std_msgs + ffmpeg_image_transport_msgs ament_lint_auto ament_lint_common ament_cmake_gtest diff --git a/src/decoder.cpp b/src/decoder.cpp index dc63c0c..9c705b6 100644 --- a/src/decoder.cpp +++ b/src/decoder.cpp @@ -25,7 +25,7 @@ namespace ffmpeg_encoder_decoder { -Decoder::Decoder() : logger_(rclcpp::get_logger("Decoder")) {} +Decoder::Decoder() : logger_(rclcpp::get_logger("Decoder")) { setOutputMessageEncoding("bgr8"); } Decoder::~Decoder() { reset(); } @@ -42,15 +42,27 @@ void Decoder::reset() if (hwDeviceContext_) { av_buffer_unref(&hwDeviceContext_); } - av_free(decodedFrame_); - decodedFrame_ = NULL; + av_free(swFrame_); + swFrame_ = NULL; av_free(cpuFrame_); cpuFrame_ = NULL; - av_free(colorFrame_); - colorFrame_ = NULL; + av_free(outputFrame_); + outputFrame_ = NULL; hwPixFormat_ = AV_PIX_FMT_NONE; } +void Decoder::setOutputMessageEncoding(const std::string & output_encoding) +{ + outputMsgEncoding_ = output_encoding; + outputAVPixFormat_ = utils::ros_to_av_pix_format(output_encoding); + const AVPixFmtDescriptor * pd = av_pix_fmt_desc_get(outputAVPixFormat_); + if (!pd) { + RCLCPP_ERROR_STREAM(logger_, "cannot find pixel format descriptor for " << output_encoding); + throw(std::runtime_error("cannot find format descriptor!")); + } + bitsPerPixel_ = av_get_padded_bits_per_pixel(pd); +} + bool Decoder::initialize( const std::string & encoding, Callback callback, const std::string & decoder) { @@ -110,7 +122,13 @@ static enum AVPixelFormat find_pix_format(enum AVHWDeviceType hwDevType, const A return (AV_PIX_FMT_NONE); } -// This function is a adapted version of avcodec_default_get_format +// This function is an adapted version of avcodec_default_get_format() +// +// In the ffmpeg executable, the matching between input and output formats +// when transcoding is a complex process, with filters inserted between +// sink and source that rescale and reformat the images. This get_format() +// function tries to provide some of the functionality without lifting +// large parts of the code base from the ffmpeg tool. enum AVPixelFormat get_format(struct AVCodecContext * avctx, const enum AVPixelFormat * fmt) { @@ -149,11 +167,6 @@ enum AVPixelFormat get_format(struct AVCodecContext * avctx, const enum AVPixelF } } } - // No device or other setup, so we have to choose from things which - // don't any other external information. - - // If the last element of the list is a software format, choose it - // (this should be best software format if any exist). for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++) { } @@ -185,7 +198,7 @@ enum AVPixelFormat get_format(struct AVCodecContext * avctx, const enum AVPixelF return fmt[n]; } if (config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL) { -// Usable with only internal setup. + // Usable with only internal setup. #ifdef DEBUG_PIXEL_FORMAT av_get_pix_fmt_string(buf, sizeof(buf) - 1, fmt[n]); printf("handle with internal setup %d = %s\n", fmt[n], buf); @@ -238,7 +251,6 @@ bool Decoder::initDecoder(const std::vector & decoders) bool Decoder::initSingleDecoder(const std::string & decoder) { try { - // utils::get_decoders_for_encoding(); // initialize the map const AVCodec * codec = avcodec_find_decoder_by_name(decoder.c_str()); if (!codec) { RCLCPP_ERROR_STREAM(logger_, "cannot find decoder " << decoder); @@ -279,12 +291,13 @@ bool Decoder::initSingleDecoder(const std::string & decoder) av_free(codecContext_); codecContext_ = NULL; codec = NULL; + hwPixFormat_ = AV_PIX_FMT_NONE; throw(std::runtime_error("open context failed for " + decoder)); } - decodedFrame_ = av_frame_alloc(); + swFrame_ = av_frame_alloc(); cpuFrame_ = (hwPixFormat_ == AV_PIX_FMT_NONE) ? NULL : av_frame_alloc(); - colorFrame_ = av_frame_alloc(); - colorFrame_->format = AV_PIX_FMT_BGR24; + outputFrame_ = av_frame_alloc(); + outputFrame_->format = outputAVPixFormat_; } catch (const std::runtime_error & e) { RCLCPP_ERROR_STREAM(logger_, e.what()); reset(); @@ -294,85 +307,128 @@ bool Decoder::initSingleDecoder(const std::string & decoder) return (true); } -bool Decoder::decodePacket( - const std::string & encoding, const uint8_t * data, size_t size, uint64_t pts, - const std::string & frame_id, const rclcpp::Time & stamp) +bool Decoder::flush() { - rclcpp::Time t0; - if (measurePerformance_) { - t0 = rclcpp::Clock().now(); - } - if (encoding != encoding_) { - RCLCPP_ERROR_STREAM( - logger_, "no on-the fly encoding change from " << encoding_ << " to " << encoding); + if (!codecContext_) { return (false); } - AVCodecContext * ctx = codecContext_; - AVPacket * packet = av_packet_alloc(); - av_new_packet(packet, size); // will add some padding! - memcpy(packet->data, data, size); - packet->pts = pts; - packet->dts = packet->pts; - ptsToStamp_[packet->pts] = stamp; - int ret = avcodec_send_packet(ctx, packet); + int ret = avcodec_send_packet(codecContext_, nullptr); if (ret != 0) { - RCLCPP_WARN_STREAM(logger_, "send_packet failed for pts: " << pts); - av_packet_unref(packet); + RCLCPP_WARN_STREAM(logger_, "failed to send flush packet!"); return (false); } - ret = avcodec_receive_frame(ctx, decodedFrame_); - const bool isAcc = (ret == 0) && (decodedFrame_->format == hwPixFormat_); + // receive frames until decoder buffer is empty or error occurs. + // this may trigger frame callbacks. + while ((ret = receiveFrame()) == 0) { + // will return 0, EAGAIN, EOF, or other error + } + if (ret != AVERROR_EOF) { + RCLCPP_WARN_STREAM(logger_, "decoder flush failed with error"); + return (false); + } + return (true); +} + +int Decoder::receiveFrame() +{ + auto & ctx = codecContext_; // shorthand + const int ret = avcodec_receive_frame(ctx, swFrame_); + if (ret != 0) { + return (ret); + } + const bool isAcc = swFrame_->format == hwPixFormat_; if (isAcc) { - ret = av_hwframe_transfer_data(cpuFrame_, decodedFrame_, 0); - if (ret < 0) { - RCLCPP_WARN_STREAM(logger_, "failed to transfer data from GPU->CPU"); - av_packet_unref(packet); - return (false); + int ret2 = av_hwframe_transfer_data(cpuFrame_, swFrame_, 0); + if (ret2 < 0) { + RCLCPP_WARN_STREAM( + logger_, "hardware frame transfer failed for pixel format " << utils::pix(hwPixFormat_)); + return (AVERROR_INVALIDDATA); } } - AVFrame * frame = isAcc ? cpuFrame_ : decodedFrame_; + AVFrame * frame = isAcc ? cpuFrame_ : swFrame_; - if (ret == 0 && frame->width != 0) { + if (frame->width != 0) { // convert image to something palatable if (!swsContext_) { swsContext_ = sws_getContext( - ctx->width, ctx->height, (AVPixelFormat)frame->format, // src - ctx->width, ctx->height, (AVPixelFormat)colorFrame_->format, // dest + ctx->width, ctx->height, (AVPixelFormat)frame->format, // src + ctx->width, ctx->height, (AVPixelFormat)outputFrame_->format, // dest SWS_FAST_BILINEAR | SWS_ACCURATE_RND, NULL, NULL, NULL); if (!swsContext_) { RCLCPP_ERROR(logger_, "cannot allocate sws context!!!!"); - return (false); + return (AVERROR_BUFFER_TOO_SMALL); } } // prepare the decoded message ImagePtr image(new Image()); image->height = frame->height; image->width = frame->width; - image->step = image->width * 3; // 3 bytes per pixel - image->encoding = sensor_msgs::image_encodings::BGR8; + image->step = (image->width * bitsPerPixel_) / 8; // is this correct for weird formats? + image->encoding = outputMsgEncoding_; image->data.resize(image->step * image->height); - // bend the memory pointers in colorFrame to the right locations + // bend the memory pointers in outputFrame_ to the right locations av_image_fill_arrays( - colorFrame_->data, colorFrame_->linesize, &(image->data[0]), - (AVPixelFormat)colorFrame_->format, frame->width, frame->height, 1); + outputFrame_->data, outputFrame_->linesize, &(image->data[0]), + static_cast(outputFrame_->format), frame->width, frame->height, 1); sws_scale( - swsContext_, frame->data, frame->linesize, 0, // src - ctx->height, colorFrame_->data, colorFrame_->linesize); // dest - auto it = ptsToStamp_.find(decodedFrame_->pts); + swsContext_, frame->data, frame->linesize, 0, // src + ctx->height, outputFrame_->data, outputFrame_->linesize); // dest + auto it = ptsToStamp_.find(swFrame_->pts); if (it == ptsToStamp_.end()) { - RCLCPP_ERROR_STREAM(logger_, "cannot find pts that matches " << decodedFrame_->pts); + RCLCPP_ERROR_STREAM(logger_, "cannot find pts that matches " << swFrame_->pts); } else { - image->header.frame_id = frame_id; - image->header.stamp = it->second; + image->header.frame_id = it->second.frame_id; + image->header.stamp = it->second.time; ptsToStamp_.erase(it); #ifdef USE_AV_FLAGS - callback_(image, decodedFrame_->flags || AV_FRAME_FLAG_KEY); // deliver callback + callback_(image, swFrame_->flags || AV_FRAME_FLAG_KEY); // deliver callback #else - callback_(image, decodedFrame_->key_frame); // deliver callback + callback_(image, swFrame_->key_frame); // deliver callback #endif } } + return (ret); +} + +bool Decoder::decodePacket( + const std::string & encoding, const uint8_t * data, size_t size, uint64_t pts, + const std::string & frame_id, const rclcpp::Time & stamp) +{ + if (!isInitialized()) { + RCLCPP_ERROR_STREAM(logger_, "decoder is not initialized!"); + return (false); + } + rclcpp::Time t0; + if (measurePerformance_) { + t0 = rclcpp::Clock().now(); + } + if (encoding != encoding_) { + RCLCPP_ERROR_STREAM( + logger_, "no on-the fly encoding change from " << encoding_ << " to " << encoding); + return (false); + } + AVCodecContext * ctx = codecContext_; + AVPacket * packet = av_packet_alloc(); + av_new_packet(packet, size); // will add some padding! + memcpy(packet->data, data, size); + packet->pts = pts; + packet->dts = packet->pts; + + ptsToStamp_.insert(PTSMap::value_type(packet->pts, {stamp, frame_id})); + + int ret = avcodec_send_packet(ctx, packet); + if (ret != 0) { + RCLCPP_WARN_STREAM(logger_, "send_packet failed for pts: " << pts); + av_packet_unref(packet); + return (false); + } + int rret{0}; + // return value of 0 means got frame, + // EAGAIN means there are no more frames + while ((rret = receiveFrame()) == 0) { + // keep polling for frames as long as there are any + } av_packet_unref(packet); av_packet_free(&packet); if (measurePerformance_) { @@ -380,7 +436,7 @@ bool Decoder::decodePacket( double dt = (t1 - t0).seconds(); tdiffTotal_.update(dt); } - return (true); + return (rret == AVERROR(EAGAIN)); } void Decoder::resetTimers() { tdiffTotal_.reset(); } diff --git a/src/encoder.cpp b/src/encoder.cpp index 8718d45..e53bbf2 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -47,9 +47,11 @@ void Encoder::setEncoder(const std::string & n) { Lock lock(mutex_); encoder_ = n; - encoding_ = utils::find_encoding(encoder_); + codec_ = utils::find_codec(encoder_); } +std::string Encoder::findCodec(const std::string & encoder) { return (utils::find_codec(encoder)); } + static void free_frame(AVFrame ** frame) { if (*frame) { @@ -117,7 +119,7 @@ void Encoder::openHardwareDevice( if (usesHardwareFrames_) { const auto fmts = utils::get_hwframe_transfer_formats(hw_frames_ref); - frames_ctx->sw_format = utils::get_preferred_pixel_format(true, fmts); + frames_ctx->sw_format = utils::get_preferred_pixel_format(usesHardwareFrames_, fmts); if (pixFormat_ != AV_PIX_FMT_NONE) { RCLCPP_INFO_STREAM( logger_, "user overriding software pix fmt " << utils::pix(frames_ctx->sw_format)); @@ -215,20 +217,17 @@ void Encoder::doOpenCodec(int width, int height) } else { codecContext_->pix_fmt = (pixFormat_ != AV_PIX_FMT_NONE) ? pixFormat_ - : utils::get_preferred_pixel_format(false, pixFmts); + : utils::get_preferred_pixel_format(!usesHardwareFrames_, pixFmts); codecContext_->sw_pix_fmt = codecContext_->pix_fmt; } - - setAVOption("profile", profile_); - setAVOption("preset", preset_); - setAVOption("tune", tune_); - setAVOption("delay", delay_); - setAVOption("crf", crf_); - RCLCPP_DEBUG( - logger_, - "codec: %10s, profile: %10s, preset: %10s," - " bit_rate: %10ld qmax: %2d", - encoder_.c_str(), profile_.c_str(), preset_.c_str(), bitRate_, qmax_); + std::stringstream ss; + for (const auto & kv : avOptions_) { + setAVOption(kv.first, kv.second); + ss << " " << kv.first << "=" << kv.second; + } + RCLCPP_INFO( + logger_, "codec: %10s, bit_rate: %10ld qmax: %2d options: %s", encoder_.c_str(), bitRate_, + qmax_, ss.str().c_str()); err = avcodec_open2(codecContext_, codec, NULL); utils::check_for_err("cannot open codec " + encoder_, err); @@ -269,13 +268,13 @@ void Encoder::doOpenCodec(int width, int height) wrapperFrame_ = av_frame_alloc(); wrapperFrame_->width = width; wrapperFrame_->height = height; - wrapperFrame_->format = AV_PIX_FMT_BGR24; + wrapperFrame_->format = utils::ros_to_av_pix_format(cvBridgeTargetFormat_); // initialize format conversion library if (!swsContext_) { swsContext_ = sws_getContext( - width, height, AV_PIX_FMT_BGR24, // src - width, height, static_cast(frame_->format), // dest + width, height, static_cast(wrapperFrame_->format), // src + width, height, static_cast(frame_->format), // dest SWS_FAST_BILINEAR | SWS_ACCURATE_RND, NULL, NULL, NULL); if (!swsContext_) { throw(std::runtime_error("cannot allocate sws context")); @@ -301,11 +300,8 @@ void Encoder::encodeImage(const Image & msg) if (measurePerformance_) { t0 = rclcpp::Clock().now(); } - // TODO(Bernd): forcing the encoding to be BGR8 is wasteful when - // the encoder supports monochrome. - - cv::Mat img = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8)->image; - encodeImage(img, msg.header, t0); + cv::Mat img = cv_bridge::toCvCopy(msg, cvBridgeTargetFormat_)->image; + doEncodeImage(img, msg.header, t0); if (measurePerformance_) { const auto t1 = rclcpp::Clock().now(); tdiffDebayer_.update((t1 - t0).seconds()); @@ -313,6 +309,11 @@ void Encoder::encodeImage(const Image & msg) } void Encoder::encodeImage(const cv::Mat & img, const Header & header, const rclcpp::Time & t0) +{ + doEncodeImage(img, header, t0); +} + +void Encoder::doEncodeImage(const cv::Mat & img, const Header & header, const rclcpp::Time & t0) { Lock lock(mutex_); rclcpp::Time t1, t2, t3; @@ -336,7 +337,7 @@ void Encoder::encodeImage(const cv::Mat & img, const Header & header, const rclc } frame_->pts = pts_++; // - ptsToStamp_.insert(PTSMap::value_type(frame_->pts, header.stamp)); + ptsToStamp_.insert(PTSMap::value_type(frame_->pts, {header.stamp, header.frame_id})); int ret; if (usesHardwareFrames_) { @@ -352,7 +353,7 @@ void Encoder::encodeImage(const cv::Mat & img, const Header & header, const rclc } // now drain all packets while (ret == 0) { - ret = drainPacket(header.frame_id, img.cols, img.rows); + ret = drainPacket(img.cols, img.rows); } if (measurePerformance_) { const rclcpp::Time t4 = rclcpp::Clock().now(); @@ -360,20 +361,20 @@ void Encoder::encodeImage(const cv::Mat & img, const Header & header, const rclc } } -void Encoder::flush(const std::string & frame_id) +void Encoder::flush() { if (!frame_) { return; } int ret = avcodec_send_frame(codecContext_, nullptr); while (ret == 0) { - ret = drainPacket(frame_id, frame_->width, frame_->height); + ret = drainPacket(frame_->width, frame_->height); } } -void Encoder::flush(const Header & header) { flush(header.frame_id); } +void Encoder::flush(const Header &) { flush(); } -int Encoder::drainPacket(const std::string & frame_id, int width, int height) +int Encoder::drainPacket(int width, int height) { rclcpp::Time t0, t1, t2; if (measurePerformance_) { @@ -393,7 +394,8 @@ int Encoder::drainPacket(const std::string & frame_id, int width, int height) } auto it = ptsToStamp_.find(pk.pts); if (it != ptsToStamp_.end()) { - callback_(frame_id, it->second, encoding_, width, height, pk.pts, pk.flags, pk.data, pk.size); + const auto & pe = it->second; + callback_(pe.frame_id, pe.time, codec_, width, height, pk.pts, pk.flags, pk.data, pk.size); if (measurePerformance_) { const auto t3 = rclcpp::Clock().now(); tdiffPublish_.update((t3 - t2).seconds()); diff --git a/src/utils.cpp b/src/utils.cpp index 7be4bee..c856cf5 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -19,7 +19,9 @@ #include #include #include +#include #include +#include extern "C" { #include @@ -114,7 +116,8 @@ enum AVPixelFormat get_preferred_pixel_format( return (AV_PIX_FMT_NONE); } -std::vector get_encoder_formats(AVCodecContext * context, const AVCodec * c) +std::vector get_encoder_formats( + const AVCodecContext * context, const AVCodec * c) { std::vector formats; if (c) { @@ -151,6 +154,44 @@ std::vector get_hwframe_transfer_formats(AVBufferRef * hwfra } return (formats); } + +static const std::unordered_map ros_to_av_pix_map = { + {"bayer_rggb8", AV_PIX_FMT_BAYER_RGGB8}, + {"bayer_bggr8", AV_PIX_FMT_BAYER_BGGR8}, + {"bayer_gbrg8", AV_PIX_FMT_BAYER_GBRG8}, + {"bayer_grbg8", AV_PIX_FMT_BAYER_GRBG8}, + {"bayer_rggb16", AV_PIX_FMT_BAYER_RGGB16LE}, // map to little endian :( + {"bayer_bggr16", AV_PIX_FMT_BAYER_BGGR16LE}, + {"bayer_gbrg16", AV_PIX_FMT_BAYER_GBRG16LE}, + {"bayer_grbg16", AV_PIX_FMT_BAYER_GRBG16LE}, + {"rgb8", AV_PIX_FMT_RGB24}, + {"rgba8", AV_PIX_FMT_RGBA}, + {"rgb16", AV_PIX_FMT_RGB48LE}, + {"rgba16", AV_PIX_FMT_RGBA64LE}, + {"bgr8", AV_PIX_FMT_BGR24}, + {"bgra8", AV_PIX_FMT_BGRA}, + {"bgr16", AV_PIX_FMT_BGR48LE}, + {"bgra16", AV_PIX_FMT_BGRA64LE}, + {"mono8", AV_PIX_FMT_GRAY8}, + {"mono16", AV_PIX_FMT_GRAY16LE}, + {"yuv422", AV_PIX_FMT_YUV422P}, // deprecated, not sure correct + {"uyvy", AV_PIX_FMT_UYVY422}, // not sure that is correct + {"yuyv", AV_PIX_FMT_YUYV422}, // not sure that is correct + {"yuv422_yuy2", AV_PIX_FMT_YUV422P16LE}, // deprecated, probably wrong + {"nv21", AV_PIX_FMT_NV21}, + {"nv24", AV_PIX_FMT_NV24}}; + +enum AVPixelFormat ros_to_av_pix_format(const std::string & ros_pix_fmt) +{ + const auto it = ros_to_av_pix_map.find(ros_pix_fmt); + if (it == ros_to_av_pix_map.end()) { + RCLCPP_ERROR_STREAM( + rclcpp::get_logger("encoder"), "no AV pixel format known for ros format " << ros_pix_fmt); + throw(std::runtime_error("no matching pixel format found for: " + ros_pix_fmt)); + } + return (it->second); +} + // find codec by name static const AVCodec * find_by_name(const std::string & name) { @@ -185,7 +226,6 @@ static void find_decoders( } // This function finds the encoding that is the target of a given encoder. -// of {id_of(hevc)} static AVCodecID find_id_for_encoder_or_encoding(const std::string & encoder) { @@ -213,7 +253,7 @@ void find_decoders( find_decoders(real_encoding, sw_decoders, false); } -std::string find_encoding(const std::string & encoder) +std::string find_codec(const std::string & encoder) { const AVCodec * c = find_by_name(encoder); if (!c) { diff --git a/test/encoder_test.cpp b/test/encoder_test.cpp index 38cca60..b74a478 100644 --- a/test/encoder_test.cpp +++ b/test/encoder_test.cpp @@ -1,5 +1,5 @@ // -*-c++-*--------------------------------------------------------------------------------------- -// Copyright 2024 Bernd Pfrommer +// Copyright 2025 Bernd Pfrommer // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,67 +16,278 @@ #include #include +#include #include +#include #include #include +#include -static const char * g_codec = "libx264"; -static const char * g_encoding = "h264"; -static const char * g_frame_id = "frame_id"; -static const int g_width = 1920; // must be mult of 64 for some codecs! -static const int g_height = 1080; +using namespace std::placeholders; +using sensor_msgs::msg::Image; +using EncoderCallback = ffmpeg_encoder_decoder::Encoder::Callback; +using ffmpeg_image_transport_msgs::msg::FFMPEGPacket; -static int g_frame_counter(0); +class EncoderTester +{ +public: + EncoderTester(const std::string & frame_id, const std::string & codec, uint32_t w, uint32_t h) + : expected_frame_id_(frame_id), expected_codec_(codec), expected_width_(w), expected_height_(h) + { + } + void setCallback(EncoderCallback cb) { callback_ = cb; } + const auto & getCodec() const { return (expected_codec_); } + const auto & getFrameId() const { return (expected_frame_id_); } + const auto & getWidth() const { return (expected_width_); } + const auto & getHeight() const { return (expected_height_); } + const auto & getPacketSum() const { return (packet_sum_); } + const auto & getPtsSum() const { return (pts_sum_); } + const auto & getTsSum() const { return (ts_sum_); } + const auto & getPacketCounter() const { return (packet_counter_); } + + void packetReady( + const std::string & frame_id, const rclcpp::Time & stamp, const std::string & codec, + uint32_t width, uint32_t height, uint64_t pts, uint8_t flags, uint8_t * data, size_t sz) + { + (void)flags; + (void)pts; + EXPECT_EQ(frame_id, expected_frame_id_); + EXPECT_EQ(codec, expected_codec_); + EXPECT_EQ(width, expected_width_); + EXPECT_EQ(height, expected_height_); + for (size_t i = 0; i < sz; i++) { + packet_sum_ += data[i]; + } + pts_sum_ += pts; + ts_sum_ += stamp.nanoseconds(); + packet_counter_++; + if (callback_) { + callback_(frame_id, stamp, codec, width, height, pts, flags, data, sz); + } + } + +private: + std::string expected_frame_id_; + std::string expected_codec_; + uint32_t expected_width_{0}; + uint32_t expected_height_{0}; + uint64_t packet_sum_{0}; + uint64_t pts_sum_{0}; + int64_t ts_sum_{0}; + uint64_t packet_counter_{0}; + std::unordered_map time_to_image_; + EncoderCallback callback_{nullptr}; +}; -void packetReady( - const std::string & frame_id, const rclcpp::Time & stamp, const std::string & codec, - uint32_t width, uint32_t height, uint64_t pts, uint8_t flags, uint8_t * data, size_t sz) +class DecoderTester { - (void)stamp; - (void)pts; - (void)flags; - (void)data; - (void)sz; - EXPECT_EQ(static_cast(width), g_width); - EXPECT_EQ(static_cast(height), g_height); - EXPECT_EQ(frame_id, g_frame_id); - EXPECT_EQ(codec, g_encoding); - g_frame_counter++; -} +public: + DecoderTester(const std::string & frame_id, const std::string & encoding, uint32_t w, uint32_t h) + : expected_frame_id_(frame_id), + expected_encoding_(encoding), + expected_width_(w), + expected_height_(h) + { + } + void setDecoder(const std::string & decoder) { decoder_name_ = decoder; } + const auto & getEncoding() const { return (expected_encoding_); } + const auto & getFrameId() const { return (expected_frame_id_); } + const auto & getWidth() const { return (expected_width_); } + const auto & getHeight() const { return (expected_height_); } + const auto & getImageSum() const { return (image_sum_); } + const auto & getTsSum() const { return (ts_sum_); } + const auto & getFrameCounter() const { return (frame_counter_); } + const auto & getPacketCounter() const { return (packet_counter_); } + + // entry point for the encoded packet (passthrough from EncoderTester) + void packetReady( + const std::string & frame_id, const rclcpp::Time & stamp, const std::string & codec, + uint32_t width, uint32_t height, uint64_t pts, uint8_t flags, uint8_t * data, size_t sz) + { + ffmpeg_image_transport_msgs::msg::FFMPEGPacket msg; + msg.header.frame_id = frame_id; + msg.header.stamp = stamp; + msg.encoding = codec; + msg.flags = flags; + msg.pts = pts; + msg.width = width; + msg.height = height; + msg.data.reserve(sz); + std::copy(data, data + sz, std::back_inserter(msg.data)); + decodePacket(msg); + } + + void addImage(const Image::ConstSharedPtr & img) + { + time_to_image_.insert({rclcpp::Time(img->header.stamp).nanoseconds(), img}); + } + + void finalize() { decoder_.flush(); } + +private: + void decodePacket(const FFMPEGPacket & msg) + { + EXPECT_EQ(msg.header.frame_id, expected_frame_id_); + EXPECT_EQ(msg.encoding, expected_encoding_); + EXPECT_EQ(msg.width, expected_width_); + EXPECT_EQ(msg.height, expected_height_); + + if (!decoder_.isInitialized()) { + decoder_.initialize( + msg.encoding, std::bind(&DecoderTester::imageCallback, this, _1, _2), decoder_name_); + } + if (!decoder_.decodePacket( + msg.encoding, &msg.data[0], msg.data.size(), msg.pts, msg.header.frame_id, + msg.header.stamp)) { + std::cerr << "error decoding packet!" << std::endl; + throw(std::runtime_error("error decoding packet!")); + } + packet_counter_++; + } + + void imageCallback(const Image::ConstSharedPtr & img, bool /* isKeyFrame */) + { + const auto stamp = rclcpp::Time(img->header.stamp).nanoseconds(); + const auto it = time_to_image_.find(stamp); + if (it == time_to_image_.end()) { + std::cerr << "cannot find image from time stamp " << stamp << std::endl; + ; + throw(std::runtime_error("image time stamp not found")); + } + const auto & orig = it->second; + EXPECT_EQ(img->header.frame_id, orig->header.frame_id); + EXPECT_EQ(img->header.stamp, orig->header.stamp); + EXPECT_EQ(img->encoding, orig->encoding); + EXPECT_EQ(img->is_bigendian, orig->is_bigendian); + EXPECT_EQ(img->width, orig->width); + EXPECT_EQ(img->height, orig->height); + EXPECT_EQ(img->step, orig->step); + EXPECT_EQ(img->data.size(), orig->data.size()); + EXPECT_EQ(img->step, img->width * 3); + EXPECT_EQ(img->step * img->height, orig->data.size()); + EXPECT_GE(img->data.size(), 1); + bool data_equal{true}; + for (size_t i = 0; i < img->data.size(); i++) { + if (img->data[i] != orig->data[i]) { + std::cout << "mismatch at " << i << ", orig: " << static_cast(orig->data[i]) + << " now: " << static_cast(img->data[i]) << std::endl; + data_equal = false; + } + } + EXPECT_TRUE(data_equal); + frame_counter_++; + } -void test_encoder(int numFrames, const std::string & codec) + std::string expected_frame_id_; + std::string expected_encoding_; + uint32_t expected_width_{0}; + uint32_t expected_height_{0}; + uint64_t image_sum_{0}; + int64_t ts_sum_{0}; + uint64_t frame_counter_{0}; + uint64_t packet_counter_{0}; + std::unordered_map time_to_image_; + std::string decoder_name_; + ffmpeg_encoder_decoder::Decoder decoder_; +}; + +void test_encoder_msg(int numFrames, const std::string & encoder, EncoderTester * tester) { ffmpeg_encoder_decoder::Encoder enc; - enc.setEncoder(codec); - enc.setProfile("main"); - enc.setPreset("slow"); + enc.setEncoder(encoder); + enc.addAVOption("profile", "main"); + enc.addAVOption("preset", "slow"); enc.setQMax(10); enc.setBitRate(8242880); enc.setGOPSize(2); enc.setFrameRate(100, 1); - if (!enc.initialize(g_width, g_height, packetReady)) { + if (!enc.initialize( + tester->getWidth(), tester->getHeight(), + std::bind(&EncoderTester::packetReady, tester, _1, _2, _3, _4, _5, _6, _7, _8, _9))) { + std::cerr << "failed to initialize encoder!" << std::endl; + return; + } + Image image; + image.encoding = "bgr8"; + image.width = tester->getWidth(); + image.height = tester->getHeight(); + image.step = image.width * 3; // 3 bytes per pixel + image.header.frame_id = tester->getFrameId(); + image.is_bigendian = false; + + for (int64_t i = 0; i < numFrames; i++) { + image.header.stamp = rclcpp::Time(i + 1, RCL_SYSTEM_TIME); + image.data.resize(image.step * image.height, static_cast(i)); + enc.encodeImage(image); + } + enc.flush(); +} + +void test_encoder_decoder_msg( + int numFrames, const std::string & encoder, const std::string & decoder, + EncoderTester * enc_tester, DecoderTester * dec_tester) +{ + dec_tester->setDecoder(decoder); + ffmpeg_encoder_decoder::Encoder enc; + enc.setEncoder(encoder); + enc.addAVOption("x265-params", "lossless=1"); + enc.addAVOption("crf", "0"); // may not be needed for lossless + enc.setAVSourcePixelFormat("gray"); + enc.setCVBridgeTargetFormat("mono8"); + enc_tester->setCallback( + std::bind(&DecoderTester::packetReady, dec_tester, _1, _2, _3, _4, _5, _6, _7, _8, _9)); + + if (!enc.initialize( + enc_tester->getWidth(), enc_tester->getHeight(), + std::bind(&EncoderTester::packetReady, enc_tester, _1, _2, _3, _4, _5, _6, _7, _8, _9))) { std::cerr << "failed to initialize encoder!" << std::endl; return; } - std_msgs::msg::Header header; - header.frame_id = g_frame_id; - for (int i = 0; i < numFrames; i++) { - cv::Mat mat = cv::Mat::zeros(g_height, g_width, CV_8UC3); // clear image - cv::putText( - mat, std::to_string(i), cv::Point(mat.cols / 2, mat.rows / 2), cv::FONT_HERSHEY_COMPLEX, - 2 /* font size */, cv::Scalar(255, 0, 0) /* col */, 2 /* weight */); - const rclcpp::Time t = rclcpp::Clock().now(); - header.stamp = t; - enc.encodeImage(mat, header, t); + + for (int64_t i = 0; i < numFrames; i++) { + auto image = std::make_shared(); + image->encoding = "bgr8"; + image->width = enc_tester->getWidth(); + image->height = enc_tester->getHeight(); + image->step = image->width * 3; // 3 bytes per pixel + image->header.frame_id = enc_tester->getFrameId(); + image->is_bigendian = false; + + image->header.stamp = rclcpp::Time(i + 1, RCL_SYSTEM_TIME); + image->data.resize(image->step * image->height, static_cast(i)); + dec_tester->addImage(image); + enc.encodeImage(*image); } - enc.flush(header); + enc.flush(); + dec_tester->finalize(); +} + +TEST(ffmpeg_encoder_decoder, encoder_msg) +{ + const int n = 10; + EncoderTester tester("frame_id", "h264", 640, 480); + test_encoder_msg(n, "libx264", &tester); + // EXPECT_EQ(tester.getPacketSum(), 115852); varies between ffmpeg versions + EXPECT_EQ(tester.getTsSum(), (n * (n + 1)) / 2); + EXPECT_EQ(tester.getPtsSum(), (n * (n - 1)) / 2); + EXPECT_EQ(tester.getPacketCounter(), n); } -TEST(ffmpeg_encoder_decoder, encoder) +TEST(ffmpeg_encoder_decoder, encoder_decoder_msg) { - test_encoder(10, g_codec); - EXPECT_EQ(g_frame_counter, 10); + const int n = 10; + const int w = 640; + const int h = 480; + EncoderTester enc_tester("frame_id", "hevc", w, h); + DecoderTester dec_tester("frame_id", "hevc", w, h); + test_encoder_decoder_msg(n, "libx265", "hevc", &enc_tester, &dec_tester); + // EXPECT_EQ(enc_tester.getPacketSum(), 266674); differs with ffmpeg version + EXPECT_EQ(enc_tester.getTsSum(), (n * (n + 1)) / 2); + EXPECT_EQ(enc_tester.getPtsSum(), (n * (n - 1)) / 2); + EXPECT_EQ(enc_tester.getPacketCounter(), n); + EXPECT_EQ(dec_tester.getPacketCounter(), n); + EXPECT_EQ(dec_tester.getFrameCounter(), n); } int main(int argc, char ** argv) From 43747b0883fff8aff9b5332e53820953992fd422 Mon Sep 17 00:00:00 2001 From: Bernd Pfrommer Date: Mon, 21 Jul 2025 03:29:55 -0400 Subject: [PATCH 03/12] fix nvenc encoding by looking at transfer TO formats --- include/ffmpeg_encoder_decoder/utils.hpp | 3 +- src/encoder.cpp | 45 +++++++++++++++++------- src/utils.cpp | 6 ++-- 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/include/ffmpeg_encoder_decoder/utils.hpp b/include/ffmpeg_encoder_decoder/utils.hpp index 1b96825..c4d81f3 100644 --- a/include/ffmpeg_encoder_decoder/utils.hpp +++ b/include/ffmpeg_encoder_decoder/utils.hpp @@ -74,7 +74,8 @@ enum AVPixelFormat find_hw_config( * \param hwframe_ctx the hardware frame context for which transfer is intended * \return vector of allowed pixel formats */ -std::vector get_hwframe_transfer_formats(AVBufferRef * hwframe_ctx); +std::vector get_hwframe_transfer_formats( + AVBufferRef * hwframe_ctx, enum AVHWFrameTransferDirection direction); /** * \brief finds all formats that the encoder supports. diff --git a/src/encoder.cpp b/src/encoder.cpp index e53bbf2..d04d99e 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -109,17 +109,36 @@ void Encoder::openHardwareDevice( int err = 0; err = av_hwdevice_ctx_create(&hwDeviceContext_, hwDevType, NULL, NULL, 0); utils::check_for_err("cannot create hw device context", err); - AVBufferRef * hw_frames_ref = av_hwframe_ctx_alloc(hwDeviceContext_); - if (!hw_frames_ref) { - throw std::runtime_error("cannot allocate hw device!"); + + AVBufferRef * hwframe_ctx_ref = av_hwframe_ctx_alloc(hwDeviceContext_); + if (!hwframe_ctx_ref) { + av_buffer_unref(&hwDeviceContext_); + hwDeviceContext_ = 0; + throw std::runtime_error("cannot allocate hwframe context!"); + } + const auto hwframe_transfer_formats = + utils::get_hwframe_transfer_formats(hwframe_ctx_ref, AV_HWFRAME_TRANSFER_DIRECTION_TO); + + if (hwframe_transfer_formats.empty()) { + // Bernd: Apparently NVENC does not need a device to be opened at all. + // But how to tell if a device should be opened in general? + // Checking if the list of TO transfer formats is empty() works to tell + // NVENC (no device required) from VAAPI (device required). + // This test may be broken in general! + av_buffer_unref(&hwDeviceContext_); + hwDeviceContext_ = 0; + av_buffer_unref(&hwframe_ctx_ref); + RCLCPP_INFO_STREAM(logger_, "no need to open device for codec " << codec->name); + return; } - AVHWFramesContext * frames_ctx = reinterpret_cast(hw_frames_ref->data); + // cast the reference to a concrete pointer + AVHWFramesContext * frames_ctx = reinterpret_cast(hwframe_ctx_ref->data); frames_ctx->format = utils::find_hw_config(&usesHardwareFrames_, hwDevType, codec); if (usesHardwareFrames_) { - const auto fmts = utils::get_hwframe_transfer_formats(hw_frames_ref); - frames_ctx->sw_format = utils::get_preferred_pixel_format(usesHardwareFrames_, fmts); + frames_ctx->sw_format = + utils::get_preferred_pixel_format(usesHardwareFrames_, hwframe_transfer_formats); if (pixFormat_ != AV_PIX_FMT_NONE) { RCLCPP_INFO_STREAM( logger_, "user overriding software pix fmt " << utils::pix(frames_ctx->sw_format)); @@ -130,7 +149,7 @@ void Encoder::openHardwareDevice( logger_, "using software pixel format: " << utils::pix(frames_ctx->sw_format)); } if (frames_ctx->sw_format == AV_PIX_FMT_NONE) { - av_buffer_unref(&hw_frames_ref); + av_buffer_unref(&hwframe_ctx_ref); throw(std::runtime_error("cannot find valid sw pixel format!")); } } @@ -138,13 +157,12 @@ void Encoder::openHardwareDevice( frames_ctx->width = width; frames_ctx->height = height; frames_ctx->initial_pool_size = 20; - if ((err = av_hwframe_ctx_init(hw_frames_ref)) < 0) { - av_buffer_unref(&hw_frames_ref); + if ((err = av_hwframe_ctx_init(hwframe_ctx_ref)) < 0) { + av_buffer_unref(&hwframe_ctx_ref); utils::throw_err("failed to initialize hardware frame context", err); } - codecContext_->hw_frames_ctx = av_buffer_ref(hw_frames_ref); - - av_buffer_unref(&hw_frames_ref); + codecContext_->hw_frames_ctx = av_buffer_ref(hwframe_ctx_ref); + av_buffer_unref(&hwframe_ctx_ref); if (codecContext_->hw_frames_ctx == nullptr) { throw(std::runtime_error("hardware decoder: cannot create buffer ref!")); @@ -228,6 +246,9 @@ void Encoder::doOpenCodec(int width, int height) RCLCPP_INFO( logger_, "codec: %10s, bit_rate: %10ld qmax: %2d options: %s", encoder_.c_str(), bitRate_, qmax_, ss.str().c_str()); + RCLCPP_INFO_STREAM(logger_, "cv_bridge_target_format: " << cvBridgeTargetFormat_); + RCLCPP_INFO_STREAM(logger_, "av_source_pixel_format: " << utils::pix(codecContext_->sw_pix_fmt)); + RCLCPP_INFO_STREAM(logger_, "encoder (hw) format: " << utils::pix(codecContext_->pix_fmt)); err = avcodec_open2(codecContext_, codec, NULL); utils::check_for_err("cannot open codec " + encoder_, err); diff --git a/src/utils.cpp b/src/utils.cpp index c856cf5..e95e5d9 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -138,12 +138,12 @@ std::vector get_encoder_formats( return (formats); } -std::vector get_hwframe_transfer_formats(AVBufferRef * hwframe_ctx) +std::vector get_hwframe_transfer_formats( + AVBufferRef * hwframe_ctx, enum AVHWFrameTransferDirection direction) { std::vector formats; AVPixelFormat * fmts{nullptr}; - int ret = - av_hwframe_transfer_get_formats(hwframe_ctx, AV_HWFRAME_TRANSFER_DIRECTION_FROM, &fmts, 0); + int ret = av_hwframe_transfer_get_formats(hwframe_ctx, direction, &fmts, 0); if (ret >= 0) { for (const auto * f = fmts; *f != AV_PIX_FMT_NONE; f++) { formats.push_back(*f); From 780e5f74f16f64d1a52d03d49a290143f0e432f5 Mon Sep 17 00:00:00 2001 From: Bernd Pfrommer Date: Mon, 21 Jul 2025 08:44:40 -0400 Subject: [PATCH 04/12] fix bug with non-accel pix fmt --- src/encoder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/encoder.cpp b/src/encoder.cpp index d04d99e..2f84673 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -235,7 +235,7 @@ void Encoder::doOpenCodec(int width, int height) } else { codecContext_->pix_fmt = (pixFormat_ != AV_PIX_FMT_NONE) ? pixFormat_ - : utils::get_preferred_pixel_format(!usesHardwareFrames_, pixFmts); + : utils::get_preferred_pixel_format(usesHardwareFrames_, pixFmts); codecContext_->sw_pix_fmt = codecContext_->pix_fmt; } std::stringstream ss; From 6c21e43f71f8619ecc6bb3856420920132797561 Mon Sep 17 00:00:00 2001 From: Bernd Pfrommer Date: Mon, 21 Jul 2025 08:47:01 -0400 Subject: [PATCH 05/12] filter misconfigured decoders --- include/ffmpeg_encoder_decoder/decoder.hpp | 4 +++ src/decoder.cpp | 35 +++++++++++++++++----- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/include/ffmpeg_encoder_decoder/decoder.hpp b/include/ffmpeg_encoder_decoder/decoder.hpp index 57dbaf2..6f9f283 100644 --- a/include/ffmpeg_encoder_decoder/decoder.hpp +++ b/include/ffmpeg_encoder_decoder/decoder.hpp @@ -226,6 +226,10 @@ class Decoder private: bool initSingleDecoder(const std::string & decoder); bool initDecoder(const std::vector & decoders); + std::vector filterDecoders( + const std::string & encoding, const std::vector & decoders, + const std::vector & valid_decoders); + int receiveFrame(); // --------------- variables rclcpp::Logger logger_; diff --git a/src/decoder.cpp b/src/decoder.cpp index 9c705b6..a05b959 100644 --- a/src/decoder.cpp +++ b/src/decoder.cpp @@ -76,21 +76,40 @@ bool Decoder::initialize( { callback_ = callback; encoding_ = encoding; - if (decoders.empty()) { - const auto all_decoders = findDecoders(encoding); - if (all_decoders.empty()) { - RCLCPP_ERROR_STREAM(logger_, "no decoders discovered for encoding " << encoding_); - throw(std::runtime_error("no decoders discovered for encoding " + encoding_)); - } + const auto all_decoders = findDecoders(encoding); + if (all_decoders.empty()) { + RCLCPP_ERROR_STREAM(logger_, "no decoders discovered for encoding " << encoding_); + throw(std::runtime_error("no decoders discovered for encoding " + encoding_)); + } + if (decoders.empty()) { // try all libav-discovered decoders std::string decoders_str; for (const auto & decoder : all_decoders) { decoders_str += " " + decoder; } RCLCPP_INFO_STREAM(logger_, "trying discovered decoders in order:" << decoders_str); - return (initDecoder(all_decoders)); } - return (initDecoder(decoders)); + const auto good_decoders = filterDecoders(encoding, decoders, all_decoders); + if (good_decoders.empty()) { + return (false); + } + return (initDecoder(good_decoders)); +} + +std::vector Decoder::filterDecoders( + const std::string & encoding, const std::vector & decoders, + const std::vector & valid_decoders) +{ + std::vector good_decoders; + for (const auto & dec : decoders) { // filter for decoders matching codec + if (std::find(valid_decoders.begin(), valid_decoders.end(), dec) != valid_decoders.end()) { + good_decoders.push_back(dec); + } else { + RCLCPP_WARN_STREAM( + logger_, "configured decoder: " << dec << " cannot handle encoding: " << encoding); + } + } + return (good_decoders); } static AVBufferRef * hw_decoder_init( From 8f466c48c9ee8fcae70630a8f4e90313c73f1337 Mon Sep 17 00:00:00 2001 From: Bernd Pfrommer Date: Mon, 21 Jul 2025 08:47:24 -0400 Subject: [PATCH 06/12] fix debug printout for AV_PIX_FMT_NONE --- src/utils.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/utils.cpp b/src/utils.cpp index e95e5d9..a3b6d06 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -39,6 +39,9 @@ std::string pix(AVPixelFormat const & f) char buf[64]; buf[63] = 0; av_get_pix_fmt_string(buf, sizeof(buf) - 1, f); + if (f == AV_PIX_FMT_NONE) { + return (std::string("NONE")); + } return (std::string(buf)); } From ed21fa3e1db63bd29c5381dc5edae08b0623999d Mon Sep 17 00:00:00 2001 From: Bernd Pfrommer Date: Sat, 26 Jul 2025 02:55:40 -0400 Subject: [PATCH 07/12] reworked tests to cover bayer --- CMakeLists.txt | 6 +- include/ffmpeg_encoder_decoder/decoder.hpp | 15 +- include/ffmpeg_encoder_decoder/encoder.hpp | 32 ++- include/ffmpeg_encoder_decoder/utils.hpp | 21 ++ src/decoder.cpp | 107 +++++--- src/encoder.cpp | 58 +++- src/utils.cpp | 57 +++- test/encoder_decoder_test.cpp | 89 ++++++ test/encoder_decoder_tester.cpp | 176 ++++++++++++ test/encoder_decoder_tester.hpp | 93 +++++++ test/encoder_test.cpp | 297 --------------------- 11 files changed, 575 insertions(+), 376 deletions(-) create mode 100644 test/encoder_decoder_test.cpp create mode 100644 test/encoder_decoder_tester.cpp create mode 100644 test/encoder_decoder_tester.hpp delete mode 100644 test/encoder_test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 615b2b0..03e0860 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -140,12 +140,12 @@ if(BUILD_TESTING) find_package(ffmpeg_image_transport_msgs) find_package(ament_cmake_gtest REQUIRED) - ament_add_gtest(${PROJECT_NAME}_encoder_test test/encoder_test.cpp + ament_add_gtest(${PROJECT_NAME}_test test/encoder_decoder_test.cpp test/encoder_decoder_tester.cpp WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/test) - target_include_directories(${PROJECT_NAME}_encoder_test PUBLIC + target_include_directories(${PROJECT_NAME}_test PUBLIC $ $) - target_link_libraries(${PROJECT_NAME}_encoder_test + target_link_libraries(${PROJECT_NAME}_test ${ffmpeg_image_transport_msgs_TARGETS} ${PROJECT_NAME}) endif() diff --git a/include/ffmpeg_encoder_decoder/decoder.hpp b/include/ffmpeg_encoder_decoder/decoder.hpp index 6f9f283..a46da24 100644 --- a/include/ffmpeg_encoder_decoder/decoder.hpp +++ b/include/ffmpeg_encoder_decoder/decoder.hpp @@ -44,7 +44,8 @@ namespace ffmpeg_encoder_decoder * Encoder class by leveraging libav, the collection of libraries used by ffmpeg. * Sample code: ``` -void imageCallback(const sensor_msgs::msg::Image::ConstSharedPtr & img, bool isKeyFrame) +void imageCallback(const sensor_msgs::msg::Image::ConstSharedPtr & img, bool isKeyFrame, + const std::string &avPixFmt) { // process decoded image here... } @@ -79,8 +80,10 @@ class Decoder * \brief callback function signature * \param img pointer to decoded image * \param isKeyFrame true if the decoded image is a keyframe + * \param avPixFormat the original libav format of the encoded picture */ - using Callback = std::function; + using Callback = std::function; /** * \brief Constructor. @@ -229,8 +232,9 @@ class Decoder std::vector filterDecoders( const std::string & encoding, const std::vector & decoders, const std::vector & valid_decoders); - int receiveFrame(); + int convertFrameToMessage(const AVFrame * frame, const ImagePtr & image); + // --------------- variables rclcpp::Logger logger_; Callback callback_; @@ -240,7 +244,8 @@ class Decoder TDiff tdiffTotal_; // --- libav related variables AVRational timeBase_{1, 100}; - std::string encoding_; + std::string packetEncoding_; + std::string origEncoding_; AVCodecContext * codecContext_{NULL}; AVFrame * swFrame_{NULL}; AVFrame * cpuFrame_{NULL}; @@ -248,8 +253,6 @@ class Decoder SwsContext * swsContext_{NULL}; enum AVPixelFormat hwPixFormat_; std::string outputMsgEncoding_; - enum AVPixelFormat outputAVPixFormat_ { AV_PIX_FMT_NONE }; - int bitsPerPixel_; // output format bits/pixel including padding AVPacket packet_; AVBufferRef * hwDeviceContext_{NULL}; }; diff --git a/include/ffmpeg_encoder_decoder/encoder.hpp b/include/ffmpeg_encoder_decoder/encoder.hpp index bacaee3..ff2acac 100644 --- a/include/ffmpeg_encoder_decoder/encoder.hpp +++ b/include/ffmpeg_encoder_decoder/encoder.hpp @@ -60,10 +60,6 @@ namespace ffmpeg_encoder_decoder enc.setAVOption("x265-params", "lossless=1"); enc.setAVOption("crf", "0"); - if (!enc.initialize(image_width, image_height, callback_fn)) { - std::cerr << "failed to initialize encoder!" << std::endl; - exit (-1); - } sensor_msgs::msg::Image image; image.encoding = "bgr8"; image.width = image_width; @@ -72,6 +68,11 @@ namespace ffmpeg_encoder_decoder image.header.frame_id = "frame_id"; image.is_bigendian = false; + if (!enc.initialize(image.width, image.height, callback_fn, image.encoding)) { + std::cerr << "failed to initialize encoder!" << std::endl; + exit (-1); + } + for (int64_t i = 0; i < numFrames; i++) { image.header.stamp = rclcpp::Time(i + 1, RCL_SYSTEM_TIME); image.data.resize(image.step * image.height, static_cast(i)); @@ -266,8 +267,9 @@ class Encoder * \param width image width * \param height image height * \param callback the function to call for handling encoded packets + * \param encoding the ros encoding string, e.g. bayer_rggb8, rgb8 ... */ - bool initialize(int width, int height, Callback callback); + bool initialize(int width, int height, Callback callback, const std::string & encoding); /** * \brief sets ROS logger to use for info/error messages * \param logger the logger to use for messages @@ -350,8 +352,8 @@ class Encoder private: using Lock = std::unique_lock; - bool openCodec(int width, int height); - void doOpenCodec(int width, int height); + bool openCodec(int width, int height, const std::string & encoding); + void doOpenCodec(int width, int height, const std::string & encoding); void closeCodec(); void doEncodeImage(const cv::Mat & img, const Header & header, const rclcpp::Time & t0); int drainPacket(int width, int height); @@ -359,17 +361,21 @@ class Encoder void openHardwareDevice( const AVCodec * codec, enum AVHWDeviceType hwDevType, int width, int height); void setAVOption(const std::string & field, const std::string & value); + enum AVPixelFormat findMatchingSourceFormat( + const std::string & rosSrcFormat, enum AVPixelFormat targetFormat); + // --------- variables rclcpp::Logger logger_; mutable std::recursive_mutex mutex_; Callback callback_; // config - std::string encoder_; // e.g. "libx264" - std::string codec_; // e.g. "h264" - int qmax_{-1}; // max allowed quantization. The lower the better quality - int GOPSize_{-1}; // distance between two keyframes - int maxBFrames_{-1}; // maximum number of b-frames - int64_t bitRate_{0}; // max rate in bits/s + std::string encoder_; // e.g. "libx264" + std::string codec_; // e.g. "h264" + std::string encoding_; // e.g. "h264/rgb8" + int qmax_{-1}; // max allowed quantization. The lower the better quality + int GOPSize_{-1}; // distance between two keyframes + int maxBFrames_{-1}; // maximum number of b-frames + int64_t bitRate_{0}; // max rate in bits/s std::vector> avOptions_; AVPixelFormat pixFormat_{AV_PIX_FMT_NONE}; diff --git a/include/ffmpeg_encoder_decoder/utils.hpp b/include/ffmpeg_encoder_decoder/utils.hpp index c4d81f3..da1cf31 100644 --- a/include/ffmpeg_encoder_decoder/utils.hpp +++ b/include/ffmpeg_encoder_decoder/utils.hpp @@ -35,6 +35,12 @@ namespace utils */ std::string pix(const AVPixelFormat & f); +/** + * \brief Long descriptor of av pixel format + * \return string with long name of pixel format + */ +std::string pix_long(AVPixelFormat const & f); + /** * \brief Convert av error number to string. * \param errnum libav error number @@ -137,6 +143,21 @@ enum AVHWDeviceType find_hw_device_type(const AVCodec * codec); */ enum AVPixelFormat ros_to_av_pix_format(const std::string & ros_pix_fmt); +/** + * \brief splits string by character + * \param str_list character-separated list of tokens + * \param sep separator character + * \return vector of separated strings + */ +std::vector split_by_char(const std::string & str_list, const char sep); + +/** + * \brief determine if given ROS single-channel encoding fits into libav color format + * \param encoding ROS message encoding, e.g. bayer_rggb8 + * \param fmt libav color format to test for + * \return true if ROS encoding is single channel and color format is like NV12/yuv420p + */ +bool encode_single_channel_as_color(const std::string & encoding, enum AVPixelFormat fmt); } // namespace utils } // namespace ffmpeg_encoder_decoder #endif // FFMPEG_ENCODER_DECODER__UTILS_HPP_ diff --git a/src/decoder.cpp b/src/decoder.cpp index a05b959..793bc16 100644 --- a/src/decoder.cpp +++ b/src/decoder.cpp @@ -25,7 +25,7 @@ namespace ffmpeg_encoder_decoder { -Decoder::Decoder() : logger_(rclcpp::get_logger("Decoder")) { setOutputMessageEncoding("bgr8"); } +Decoder::Decoder() : logger_(rclcpp::get_logger("Decoder")) {} Decoder::~Decoder() { reset(); } @@ -49,18 +49,13 @@ void Decoder::reset() av_free(outputFrame_); outputFrame_ = NULL; hwPixFormat_ = AV_PIX_FMT_NONE; + outputMsgEncoding_ = ""; } void Decoder::setOutputMessageEncoding(const std::string & output_encoding) { + RCLCPP_INFO_STREAM(logger_, "forcing output encoding: " << output_encoding); outputMsgEncoding_ = output_encoding; - outputAVPixFormat_ = utils::ros_to_av_pix_format(output_encoding); - const AVPixFmtDescriptor * pd = av_pix_fmt_desc_get(outputAVPixFormat_); - if (!pd) { - RCLCPP_ERROR_STREAM(logger_, "cannot find pixel format descriptor for " << output_encoding); - throw(std::runtime_error("cannot find format descriptor!")); - } - bitsPerPixel_ = av_get_padded_bits_per_pixel(pd); } bool Decoder::initialize( @@ -75,11 +70,19 @@ bool Decoder::initialize( const std::string & encoding, Callback callback, const std::vector & decoders) { callback_ = callback; - encoding_ = encoding; - const auto all_decoders = findDecoders(encoding); + packetEncoding_ = encoding; + const auto split = utils::split_by_char(encoding, '/'); + if (outputMsgEncoding_.empty()) { + // assume orig was bgr8 + outputMsgEncoding_ = split.size() > 1 ? split[1] : "bgr8"; + RCLCPP_INFO_STREAM( + logger_, + "output image encoding: " << outputMsgEncoding_ << ((split.size() > 1) ? "" : " (default)")); + } + const auto all_decoders = findDecoders(split[0]); if (all_decoders.empty()) { - RCLCPP_ERROR_STREAM(logger_, "no decoders discovered for encoding " << encoding_); - throw(std::runtime_error("no decoders discovered for encoding " + encoding_)); + RCLCPP_ERROR_STREAM(logger_, "no decoders discovered for code:c " << split[0]); + throw(std::runtime_error("no decoders discovered for codec: " + split[0])); } if (decoders.empty()) { // try all libav-discovered decoders std::string decoders_str; @@ -316,7 +319,6 @@ bool Decoder::initSingleDecoder(const std::string & decoder) swFrame_ = av_frame_alloc(); cpuFrame_ = (hwPixFormat_ == AV_PIX_FMT_NONE) ? NULL : av_frame_alloc(); outputFrame_ = av_frame_alloc(); - outputFrame_->format = outputAVPixFormat_; } catch (const std::runtime_error & e) { RCLCPP_ERROR_STREAM(logger_, e.what()); reset(); @@ -366,33 +368,19 @@ int Decoder::receiveFrame() } AVFrame * frame = isAcc ? cpuFrame_ : swFrame_; - if (frame->width != 0) { - // convert image to something palatable - if (!swsContext_) { - swsContext_ = sws_getContext( - ctx->width, ctx->height, (AVPixelFormat)frame->format, // src - ctx->width, ctx->height, (AVPixelFormat)outputFrame_->format, // dest - SWS_FAST_BILINEAR | SWS_ACCURATE_RND, NULL, NULL, NULL); - if (!swsContext_) { - RCLCPP_ERROR(logger_, "cannot allocate sws context!!!!"); - return (AVERROR_BUFFER_TOO_SMALL); - } - } + if (frame->width == ctx->width && frame->height == ctx->height) { // prepare the decoded message ImagePtr image(new Image()); image->height = frame->height; image->width = frame->width; - image->step = (image->width * bitsPerPixel_) / 8; // is this correct for weird formats? + image->step = (sensor_msgs::image_encodings::bitDepth(outputMsgEncoding_) / 8) * image->width * + sensor_msgs::image_encodings::numChannels(outputMsgEncoding_); image->encoding = outputMsgEncoding_; - image->data.resize(image->step * image->height); - - // bend the memory pointers in outputFrame_ to the right locations - av_image_fill_arrays( - outputFrame_->data, outputFrame_->linesize, &(image->data[0]), - static_cast(outputFrame_->format), frame->width, frame->height, 1); - sws_scale( - swsContext_, frame->data, frame->linesize, 0, // src - ctx->height, outputFrame_->data, outputFrame_->linesize); // dest + const int retc = convertFrameToMessage(frame, image); + if (retc != 0) { + return (retc); + } + auto it = ptsToStamp_.find(swFrame_->pts); if (it == ptsToStamp_.end()) { RCLCPP_ERROR_STREAM(logger_, "cannot find pts that matches " << swFrame_->pts); @@ -401,15 +389,48 @@ int Decoder::receiveFrame() image->header.stamp = it->second.time; ptsToStamp_.erase(it); #ifdef USE_AV_FLAGS - callback_(image, swFrame_->flags || AV_FRAME_FLAG_KEY); // deliver callback + callback_(image, swFrame_->flags, utils::pix(static_cast(frame->format))); #else - callback_(image, swFrame_->key_frame); // deliver callback + callback_(image, swFrame_->key_frame, utils::pix(static_cast(frame->format))); #endif } } return (ret); } +int Decoder::convertFrameToMessage(const AVFrame * frame, const ImagePtr & image) +{ + const auto srcFmt = static_cast(frame->format); + const bool encAsColor = utils::encode_single_channel_as_color(image->encoding, srcFmt); + if (!swsContext_) { // initialize reformatting context if first time + // If encode-mono-as-color hack has been used, leave the pixel + // format as is, and later simply crop away the bottom 1/3 of image. + outputFrame_->format = encAsColor ? srcFmt : utils::ros_to_av_pix_format(outputMsgEncoding_); + swsContext_ = sws_getContext( + frame->width, frame->height, srcFmt, // src + frame->width, frame->height, static_cast(outputFrame_->format), // dest + SWS_FAST_BILINEAR | SWS_ACCURATE_RND, NULL, NULL, NULL); + if (!swsContext_) { + RCLCPP_ERROR(logger_, "cannot allocate sws context!!!!"); + return (AVERROR_BUFFER_TOO_SMALL); + } + } + + image->data.resize( + encAsColor ? ((image->step * image->height * 3) / 2) : (image->step * image->height)); + // bend the memory pointers in outputFrame_ to the right locations + av_image_fill_arrays( + outputFrame_->data, outputFrame_->linesize, &(image->data[0]), + static_cast(outputFrame_->format), frame->width, frame->height, 1); + sws_scale( + swsContext_, frame->data, frame->linesize, 0, // src + frame->height, outputFrame_->data, outputFrame_->linesize); // dest + + // now resize in case we encoded single-channel as colorz + image->data.resize(image->step * image->height); + return (0); +} + bool Decoder::decodePacket( const std::string & encoding, const uint8_t * data, size_t size, uint64_t pts, const std::string & frame_id, const rclcpp::Time & stamp) @@ -422,9 +443,9 @@ bool Decoder::decodePacket( if (measurePerformance_) { t0 = rclcpp::Clock().now(); } - if (encoding != encoding_) { + if (encoding != packetEncoding_) { RCLCPP_ERROR_STREAM( - logger_, "no on-the fly encoding change from " << encoding_ << " to " << encoding); + logger_, "no on-the fly encoding change from " << packetEncoding_ << " to " << encoding); return (false); } AVCodecContext * ctx = codecContext_; @@ -474,16 +495,16 @@ const std::unordered_map & Decoder::getDefaultEncoderT } void Decoder::findDecoders( - const std::string & encoding, std::vector * hw_decoders, + const std::string & codec, std::vector * hw_decoders, std::vector * sw_decoders) { - utils::find_decoders(encoding, hw_decoders, sw_decoders); + utils::find_decoders(codec, hw_decoders, sw_decoders); } -std::vector Decoder::findDecoders(const std::string & encoding) +std::vector Decoder::findDecoders(const std::string & codec) { std::vector sw_decoders, all_decoders; - utils::find_decoders(encoding, &all_decoders, &sw_decoders); + utils::find_decoders(codec, &all_decoders, &sw_decoders); all_decoders.insert(all_decoders.end(), sw_decoders.begin(), sw_decoders.end()); return (all_decoders); } diff --git a/src/encoder.cpp b/src/encoder.cpp index 2f84673..85763a1 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -96,11 +96,11 @@ AVPixelFormat Encoder::pixelFormat(const std::string & f) const return (fmt); } -bool Encoder::initialize(int width, int height, Callback callback) +bool Encoder::initialize(int width, int height, Callback callback, const std::string & encoding) { Lock lock(mutex_); callback_ = callback; - return (openCodec(width, height)); + return (openCodec(width, height, encoding)); } void Encoder::openHardwareDevice( @@ -169,10 +169,10 @@ void Encoder::openHardwareDevice( } } -bool Encoder::openCodec(int width, int height) +bool Encoder::openCodec(int width, int height, const std::string & encoding) { try { - doOpenCodec(width, height); + doOpenCodec(width, height, encoding); } catch (const std::runtime_error & e) { RCLCPP_ERROR_STREAM(logger_, e.what()); closeCodec(); @@ -183,9 +183,26 @@ bool Encoder::openCodec(int width, int height) return (true); } -void Encoder::doOpenCodec(int width, int height) +enum AVPixelFormat Encoder::findMatchingSourceFormat( + const std::string & rosSrcFormat, enum AVPixelFormat targetFormat) +{ + const auto targetFormatDesc = av_pix_fmt_desc_get(targetFormat); + if (!targetFormatDesc) { + RCLCPP_ERROR_STREAM(logger_, "invalid libav target format: " << static_cast(targetFormat)); + throw(std::runtime_error("invalid libav target format")); + } + // special hack to encode single-channel images as nv12/yuv420p etc + if (utils::encode_single_channel_as_color(rosSrcFormat, targetFormat)) { + return (targetFormat); + } + return (utils::ros_to_av_pix_format(rosSrcFormat)); +} + +void Encoder::doOpenCodec(int width, int height, const std::string &) { int err = 0; + encoding_ = codec_ + "/" + cvBridgeTargetFormat_; + codecContext_ = nullptr; if (encoder_.empty()) { throw(std::runtime_error("no codec set!")); @@ -246,7 +263,10 @@ void Encoder::doOpenCodec(int width, int height) RCLCPP_INFO( logger_, "codec: %10s, bit_rate: %10ld qmax: %2d options: %s", encoder_.c_str(), bitRate_, qmax_, ss.str().c_str()); - RCLCPP_INFO_STREAM(logger_, "cv_bridge_target_format: " << cvBridgeTargetFormat_); + RCLCPP_INFO_STREAM( + logger_, "cv_bridge_target_format: " + << cvBridgeTargetFormat_ + << " libav: " << utils::pix(utils::ros_to_av_pix_format(cvBridgeTargetFormat_))); RCLCPP_INFO_STREAM(logger_, "av_source_pixel_format: " << utils::pix(codecContext_->sw_pix_fmt)); RCLCPP_INFO_STREAM(logger_, "encoder (hw) format: " << utils::pix(codecContext_->pix_fmt)); @@ -285,11 +305,12 @@ void Encoder::doOpenCodec(int width, int height) packet_->data = NULL; packet_->size = 0; - // create (src) frame that wraps the received uncompressed image + // create (src) frame that wraps the received raw image wrapperFrame_ = av_frame_alloc(); wrapperFrame_->width = width; wrapperFrame_->height = height; - wrapperFrame_->format = utils::ros_to_av_pix_format(cvBridgeTargetFormat_); + wrapperFrame_->format = findMatchingSourceFormat( + cvBridgeTargetFormat_, static_cast(frame_->format)); // initialize format conversion library if (!swsContext_) { @@ -321,8 +342,23 @@ void Encoder::encodeImage(const Image & msg) if (measurePerformance_) { t0 = rclcpp::Clock().now(); } - cv::Mat img = cv_bridge::toCvCopy(msg, cvBridgeTargetFormat_)->image; - doEncodeImage(img, msg.header, t0); + try { + if (utils::encode_single_channel_as_color( + cvBridgeTargetFormat_, static_cast(wrapperFrame_->format))) { + // hack to encode single-channel ros formats as nv12/yuv420p etc + cv::Mat img( + msg.height, msg.width, cv::DataType::type, const_cast(msg.data.data()), + msg.step); + // Add empty color channels to the bottom of matrix + img.push_back(cv::Mat::zeros(msg.height / 2, msg.width, cv::DataType::type)); + doEncodeImage(img, msg.header, t0); + } else { + cv::Mat img = cv_bridge::toCvCopy(msg, cvBridgeTargetFormat_)->image; + doEncodeImage(img, msg.header, t0); + } + } catch (const cv_bridge::Exception & e) { + RCLCPP_ERROR_STREAM(logger_, "cv_bridge convert failed: " << e.what()); + } if (measurePerformance_) { const auto t1 = rclcpp::Clock().now(); tdiffDebayer_.update((t1 - t0).seconds()); @@ -416,7 +452,7 @@ int Encoder::drainPacket(int width, int height) auto it = ptsToStamp_.find(pk.pts); if (it != ptsToStamp_.end()) { const auto & pe = it->second; - callback_(pe.frame_id, pe.time, codec_, width, height, pk.pts, pk.flags, pk.data, pk.size); + callback_(pe.frame_id, pe.time, encoding_, width, height, pk.pts, pk.flags, pk.data, pk.size); if (measurePerformance_) { const auto t3 = rclcpp::Clock().now(); tdiffPublish_.update((t3 - t2).seconds()); diff --git a/src/utils.cpp b/src/utils.cpp index a3b6d06..12ede02 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,18 @@ namespace ffmpeg_encoder_decoder namespace utils { std::string pix(AVPixelFormat const & f) +{ + if (f == AV_PIX_FMT_NONE) { + return (std::string("NONE")); + } + const char * name = av_get_pix_fmt_name(f); + if (!name) { + return (std::string("UNKNOWN")); + } + return (std::string(name)); +} + +std::string pix_long(AVPixelFormat const & f) { char buf[64]; buf[63] = 0; @@ -105,7 +118,16 @@ enum AVPixelFormat get_preferred_pixel_format( { if (useHWFormat) { // the hardware encoders typically use nv12. - return (has_format(fmts, AV_PIX_FMT_NV12) ? AV_PIX_FMT_NV12 : AV_PIX_FMT_NONE); + if (has_format(fmts, AV_PIX_FMT_NV12)) { + return (AV_PIX_FMT_NV12); + } + if (has_format(fmts, AV_PIX_FMT_YUV420P)) { + return (AV_PIX_FMT_YUV420P); + } + if (fmts.size() > 0) { + return (fmts[0]); + } + return (AV_PIX_FMT_NONE); } if (has_format(fmts, AV_PIX_FMT_BGR24)) { return (AV_PIX_FMT_BGR24); // fastest, needs no copy @@ -182,7 +204,9 @@ static const std::unordered_map ros_to_av_pix_m {"yuyv", AV_PIX_FMT_YUYV422}, // not sure that is correct {"yuv422_yuy2", AV_PIX_FMT_YUV422P16LE}, // deprecated, probably wrong {"nv21", AV_PIX_FMT_NV21}, - {"nv24", AV_PIX_FMT_NV24}}; + {"nv24", AV_PIX_FMT_NV24}, + {"nv12", AV_PIX_FMT_NV12} // not an official ROS encoding!! +}; enum AVPixelFormat ros_to_av_pix_format(const std::string & ros_pix_fmt) { @@ -228,10 +252,24 @@ static void find_decoders( } } +std::vector split_by_char(const std::string & str_list, const char sep) +{ + std::stringstream ss(str_list); + std::vector split; + for (std::string s; ss.good();) { + getline(ss, s, sep); + if (!s.empty()) { + split.push_back(s); + } + } + return (split); +} + // This function finds the encoding that is the target of a given encoder. static AVCodecID find_id_for_encoder_or_encoding(const std::string & encoder) { + // first look for encoder with that name const AVCodec * c = find_by_name(encoder); if (!c) { const AVCodecDescriptor * desc = NULL; @@ -240,7 +278,7 @@ static AVCodecID find_id_for_encoder_or_encoding(const std::string & encoder) return (desc->id); } } - throw(std::runtime_error("unknown encoder: " + encoder)); + throw(std::runtime_error("unknown encoder/encoding: " + encoder)); } return (c->id); } @@ -271,6 +309,19 @@ std::string find_codec(const std::string & encoder) throw(std::runtime_error("weird ffmpeg config error???")); } +static bool isNV12LikeFormat(enum AVPixelFormat fmt) +{ + // any format where the top of the image is a full-resolution + // luminance image, and the bottom 1/3 holds the color channels + // should work + return (fmt == AV_PIX_FMT_NV12 || fmt == AV_PIX_FMT_YUV420P); +} + +bool encode_single_channel_as_color(const std::string & encoding, enum AVPixelFormat fmt) +{ + return (sensor_msgs::image_encodings::numChannels(encoding) == 1 && isNV12LikeFormat(fmt)); +} + std::vector get_hwdevice_types() { std::vector types; diff --git a/test/encoder_decoder_test.cpp b/test/encoder_decoder_test.cpp new file mode 100644 index 0000000..654850d --- /dev/null +++ b/test/encoder_decoder_test.cpp @@ -0,0 +1,89 @@ +// -*-c++-*--------------------------------------------------------------------------------------- +// Copyright 2025 Bernd Pfrommer +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "encoder_decoder_tester.hpp" + +TEST(ffmpeg_encoder_decoder, encoder_decoder_bgr8) +{ + const std::string orig_enc = "bgr8"; + EncoderDecoderTester tester; + // encoding options + tester.setOriginalROSEncoding(orig_enc); + tester.setEncoder("libx264rgb"); + tester.setCVBridgeTargetFormat(orig_enc); + // encoder only supports bgr0, bgr24, rgb24 + // tester.setAVSourcePixelFormat("bgr24"); // automatically picked + tester.setExpectedPacketEncoding("h264/" + orig_enc); + tester.addAVOption("crf", "0"); // needed for lossless + // decoding options + tester.setFinalROSEncoding(orig_enc); + tester.setDecoder("h264"); // codec == encoder in this case + tester.setDecoderExpectedAVPixFmt("gbrp"); // picked by decoder + tester.runTest(); + tester.check(); +} + +TEST(ffmpeg_encoder_decoder, encoder_decoder_mono) +{ + const std::string orig_enc = "mono8"; + EncoderDecoderTester tester; + // encoding options + tester.setOriginalROSEncoding(orig_enc); + tester.setEncoder("libx265"); + tester.setCVBridgeTargetFormat(orig_enc); + tester.setAVSourcePixelFormat("gray"); + tester.addAVOption("x265-params", "lossless=1"); + tester.setExpectedPacketEncoding("hevc/" + orig_enc); + tester.addAVOption("crf", "0"); // may not be needed for lossless + // decoding options + tester.setFinalROSEncoding(orig_enc); + tester.setDecoder("hevc"); + tester.setDecoderExpectedAVPixFmt("gray"); + tester.runTest(); + tester.check(); +} + +TEST(ffmpeg_encoder_decoder, encoder_decoder_bayer) +{ + // tests the special hacks required for bayer encoding + const std::string orig_enc = "bayer_rggb8"; + EncoderDecoderTester tester; + // --- encoding options + tester.setOriginalROSEncoding(orig_enc); + tester.setEncoder("libx265"); + // setting target format no conversion + tester.setCVBridgeTargetFormat(orig_enc); + // encoder will automatically pick yuv420p, + // then recognize that this requires single-channel to + // color conversion, and do this losslessly. + // tester.setAVSourcePixelFormat("yuv420p"); // not needed + tester.addAVOption("x265-params", "lossless=1"); + tester.setExpectedPacketEncoding("hevc/" + orig_enc); + // --- decoding options + tester.setFinalROSEncoding(orig_enc); + tester.setDecoder("hevc"); + tester.setDecoderExpectedAVPixFmt("yuv420p"); + tester.runTest(); + tester.check(); +} + +int main(int argc, char ** argv) +{ + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/test/encoder_decoder_tester.cpp b/test/encoder_decoder_tester.cpp new file mode 100644 index 0000000..c586683 --- /dev/null +++ b/test/encoder_decoder_tester.cpp @@ -0,0 +1,176 @@ +// -*-c++-*--------------------------------------------------------------------------------------- +// Copyright 2025 Bernd Pfrommer +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "encoder_decoder_tester.hpp" + +#include + +#include + +using namespace std::placeholders; + +sensor_msgs::msg::Image::SharedPtr EncoderDecoderTester::makeTestMessage() +{ + auto img = std::make_shared(); + img->encoding = original_ros_encoding_; + img->width = width_; + img->height = height_; + img->step = (sensor_msgs::image_encodings::bitDepth(img->encoding) / 8) * img->width * + sensor_msgs::image_encodings::numChannels(img->encoding); + img->header.frame_id = frame_id_; + img->is_bigendian = false; + return (img); +} + +int byte_depth(const std::string & enc) +{ + return ( + (sensor_msgs::image_encodings::bitDepth(enc) * sensor_msgs::image_encodings::numChannels(enc)) / + 8); +} + +void EncoderDecoderTester::runTest() +{ + enc_.setQMax(10); + enc_.setBitRate(8242880); + enc_.setGOPSize(2); + enc_.setFrameRate(100, 1); + + if (!enc_.initialize( + width_, height_, + std::bind(&EncoderDecoderTester::packetReady, this, _1, _2, _3, _4, _5, _6, _7, _8, _9), + original_ros_encoding_)) { + std::cerr << "failed to initialize encoder!" << std::endl; + return; + } + std::cout << "original encoding " << original_ros_encoding_ << " has " + << byte_depth(original_ros_encoding_) << " bytes/pixel" << std::endl; + + for (size_t i = 0; i < num_frames_; i++) { + auto img = makeTestMessage(); + img->header.stamp = rclcpp::Time(i + 1, RCL_SYSTEM_TIME); + img->data.resize(img->step * img->height, static_cast(i)); + addImage(img); + enc_.encodeImage(*img); + } + enc_.flush(); + dec_.flush(); +} + +void EncoderDecoderTester::check() +{ + const size_t n = num_frames_; + EXPECT_EQ(getTsSum(), (n * (n + 1)) / 2); + EXPECT_EQ(getPtsSum(), (n * (n - 1)) / 2); + EXPECT_EQ(getPacketCounter(), n); + EXPECT_EQ(getFrameCounter(), n); +} + +void EncoderDecoderTester::packetReady( + const std::string & frame_id, const rclcpp::Time & stamp, const std::string & codec, + uint32_t width, uint32_t height, uint64_t pts, uint8_t flags, uint8_t * data, size_t sz) +{ + pts_sum_ += pts; + ffmpeg_image_transport_msgs::msg::FFMPEGPacket msg; + msg.header.frame_id = frame_id; + msg.header.stamp = stamp; + msg.encoding = codec; + msg.flags = flags; + msg.pts = pts; + msg.width = width; + msg.height = height; + msg.data.reserve(sz); + std::copy(data, data + sz, std::back_inserter(msg.data)); + decodePacket(msg); +} + +void EncoderDecoderTester::addImage(const Image::ConstSharedPtr & img) +{ + time_to_image_.insert({rclcpp::Time(img->header.stamp).nanoseconds(), img}); +} + +void EncoderDecoderTester::decodePacket(const FFMPEGPacket & msg) +{ + EXPECT_EQ(msg.header.frame_id, frame_id_); + EXPECT_EQ(msg.encoding, expected_packet_encoding_); + EXPECT_EQ(msg.width, width_); + EXPECT_EQ(msg.height, height_); + + if (!dec_.isInitialized()) { + dec_.initialize( + msg.encoding, std::bind(&EncoderDecoderTester::imageCallback, this, _1, _2, _3), + decoder_name_); + } + if (!dec_.decodePacket( + msg.encoding, &msg.data[0], msg.data.size(), msg.pts, msg.header.frame_id, + msg.header.stamp)) { + std::cerr << "error decoding packet!" << std::endl; + throw(std::runtime_error("error decoding packet!")); + } + packet_counter_++; +} + +void EncoderDecoderTester::imageCallback( + const Image::ConstSharedPtr & img, bool /* isKeyFrame */, const std::string & av_pix_fmt) +{ + const auto stamp = rclcpp::Time(img->header.stamp).nanoseconds(); + ts_sum_ += stamp; + const auto it = time_to_image_.find(stamp); + if (it == time_to_image_.end()) { + std::cerr << "cannot find image from time stamp " << stamp << std::endl; + throw(std::runtime_error("image time stamp not found")); + } + const auto & orig = it->second; + EXPECT_EQ(av_pix_fmt, decoder_av_pix_fmt_); + EXPECT_EQ(img->header.frame_id, orig->header.frame_id); + EXPECT_EQ(img->header.stamp, orig->header.stamp); + EXPECT_EQ(img->encoding, orig->encoding); + EXPECT_EQ(img->is_bigendian, orig->is_bigendian); + EXPECT_EQ(img->width, orig->width); + EXPECT_EQ(img->height, orig->height); + EXPECT_EQ(img->step, orig->step); + EXPECT_EQ(img->data.size(), orig->data.size()); + EXPECT_EQ(img->step, orig->step); + EXPECT_EQ(img->step * img->height, orig->data.size()); + EXPECT_GE(img->data.size(), 1); + const int num_channels = sensor_msgs::image_encodings::bitDepth(img->encoding) / 8; + const int num_orig_channels = sensor_msgs::image_encodings::bitDepth(orig->encoding) / 8; + if (num_channels != num_orig_channels) { + std::cerr << "num channel mismatch! orig: " << num_orig_channels << " vs now: " << num_channels + << std::endl; + throw(std::runtime_error("num channel mismatch!")); + } + bool data_equal{true}; + int last_err = 0; + for (size_t row = 0; row < img->height; row++) { + for (size_t col = 0; col < img->width; col++) { + for (int channel = 0; channel < num_channels; channel++) { + const uint8_t * p_orig = &(orig->data[row * orig->step + col * num_channels + channel]); + const uint8_t * p = &(img->data[row * orig->step + col * num_channels + channel]); + int err = static_cast(*p) - static_cast(*p_orig); + if (err && (err != last_err || col == 0 || col == img->width - 1)) { + std::cout << "mismatch image # " << rclcpp::Time(img->header.stamp).nanoseconds() + << " at line " << row << " col: " << col << " chan: " << channel + << ", orig: " << static_cast(*p_orig) << " now: " << static_cast(*p) + << std::endl; + data_equal = false; + last_err = err; + } + } + } + } + EXPECT_TRUE(data_equal); + frame_counter_++; +} diff --git a/test/encoder_decoder_tester.hpp b/test/encoder_decoder_tester.hpp new file mode 100644 index 0000000..0a1e7da --- /dev/null +++ b/test/encoder_decoder_tester.hpp @@ -0,0 +1,93 @@ +// -*-c++-*--------------------------------------------------------------------------------------- +// Copyright 2025 Bernd Pfrommer +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ENCODER_DECODER_TESTER_HPP_ +#define ENCODER_DECODER_TESTER_HPP_ +#include +#include +#include +#include +#include + +class EncoderDecoderTester +{ +public: + using Image = sensor_msgs::msg::Image; + using EncoderCallback = ffmpeg_encoder_decoder::Encoder::Callback; + using FFMPEGPacket = ffmpeg_image_transport_msgs::msg::FFMPEGPacket; + explicit EncoderDecoderTester(size_t n = 10, uint32_t width = 640, uint32_t height = 480) + : num_frames_(n), width_(width), height_(height) + { + } + // ------- encoder side options + void setNumFrames(int n) { num_frames_ = n; } + void setOriginalROSEncoding(const std::string & s) { original_ros_encoding_ = s; } + void setFinalROSEncoding(const std::string & s) { final_ros_encoding_ = s; } + void setWidth(uint32_t n) { width_ = n; } + void setHeight(uint32_t n) { height_ = n; } + void setEncoder(const std::string & n) { enc_.setEncoder(n); } + void setAVSourcePixelFormat(const std::string & f) { enc_.setAVSourcePixelFormat(f); } + void setCVBridgeTargetFormat(const std::string & f) { enc_.setCVBridgeTargetFormat(f); } + void addAVOption(const std::string & k, const std::string & v) { enc_.addAVOption(k, v); } + void setExpectedPacketEncoding(const std::string & s) { expected_packet_encoding_ = s; } + // ------- decoder side options + void setDecoder(const std::string & s) { decoder_name_ = s; } + void setDecoderExpectedAVPixFmt(const std::string & s) { decoder_av_pix_fmt_ = s; } + + // ------- getters + const auto & getImageSum() const { return (image_sum_); } + const auto & getTsSum() const { return (ts_sum_); } + const auto & getFrameCounter() const { return (frame_counter_); } + const auto & getPacketCounter() const { return (packet_counter_); } + const auto & getPtsSum() const { return (pts_sum_); } + + void runTest(); + void check(); + +private: + void packetReady( + const std::string & frame_id, const rclcpp::Time & stamp, const std::string & codec, + uint32_t width, uint32_t height, uint64_t pts, uint8_t flags, uint8_t * data, size_t sz); + void addImage(const Image::ConstSharedPtr & img); + void decodePacket(const FFMPEGPacket & msg); + void imageCallback( + const Image::ConstSharedPtr & img, bool /* isKeyFrame */, const std::string & av_pix_fmt); + sensor_msgs::msg::Image::SharedPtr makeTestMessage(); + // ----------------- variables + // --- encoder related + ffmpeg_encoder_decoder::Encoder enc_; + size_t num_frames_{10}; + std::string frame_id_{"frame_id"}; + std::string original_ros_encoding_; + std::string encoder_name_; + uint32_t width_{0}; + uint32_t height_{0}; + // --- decoder related + ffmpeg_encoder_decoder::Decoder dec_; + std::string decoder_name_; + std::string final_ros_encoding_; + std::string decoder_av_pix_fmt_; + std::string expected_packet_encoding_; + // --- statistics + uint64_t image_sum_{0}; + int64_t ts_sum_{0}; + uint64_t frame_counter_{0}; + uint64_t packet_counter_{0}; + uint64_t pts_sum_{0}; + // --- misc + std::unordered_map time_to_image_; +}; + +#endif // ENCODER_DECODER_TESTER_HPP_ diff --git a/test/encoder_test.cpp b/test/encoder_test.cpp deleted file mode 100644 index b74a478..0000000 --- a/test/encoder_test.cpp +++ /dev/null @@ -1,297 +0,0 @@ -// -*-c++-*--------------------------------------------------------------------------------------- -// Copyright 2025 Bernd Pfrommer -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include -#include -#include -#include -#include -#include - -using namespace std::placeholders; -using sensor_msgs::msg::Image; -using EncoderCallback = ffmpeg_encoder_decoder::Encoder::Callback; -using ffmpeg_image_transport_msgs::msg::FFMPEGPacket; - -class EncoderTester -{ -public: - EncoderTester(const std::string & frame_id, const std::string & codec, uint32_t w, uint32_t h) - : expected_frame_id_(frame_id), expected_codec_(codec), expected_width_(w), expected_height_(h) - { - } - void setCallback(EncoderCallback cb) { callback_ = cb; } - const auto & getCodec() const { return (expected_codec_); } - const auto & getFrameId() const { return (expected_frame_id_); } - const auto & getWidth() const { return (expected_width_); } - const auto & getHeight() const { return (expected_height_); } - const auto & getPacketSum() const { return (packet_sum_); } - const auto & getPtsSum() const { return (pts_sum_); } - const auto & getTsSum() const { return (ts_sum_); } - const auto & getPacketCounter() const { return (packet_counter_); } - - void packetReady( - const std::string & frame_id, const rclcpp::Time & stamp, const std::string & codec, - uint32_t width, uint32_t height, uint64_t pts, uint8_t flags, uint8_t * data, size_t sz) - { - (void)flags; - (void)pts; - EXPECT_EQ(frame_id, expected_frame_id_); - EXPECT_EQ(codec, expected_codec_); - EXPECT_EQ(width, expected_width_); - EXPECT_EQ(height, expected_height_); - for (size_t i = 0; i < sz; i++) { - packet_sum_ += data[i]; - } - pts_sum_ += pts; - ts_sum_ += stamp.nanoseconds(); - packet_counter_++; - if (callback_) { - callback_(frame_id, stamp, codec, width, height, pts, flags, data, sz); - } - } - -private: - std::string expected_frame_id_; - std::string expected_codec_; - uint32_t expected_width_{0}; - uint32_t expected_height_{0}; - uint64_t packet_sum_{0}; - uint64_t pts_sum_{0}; - int64_t ts_sum_{0}; - uint64_t packet_counter_{0}; - std::unordered_map time_to_image_; - EncoderCallback callback_{nullptr}; -}; - -class DecoderTester -{ -public: - DecoderTester(const std::string & frame_id, const std::string & encoding, uint32_t w, uint32_t h) - : expected_frame_id_(frame_id), - expected_encoding_(encoding), - expected_width_(w), - expected_height_(h) - { - } - void setDecoder(const std::string & decoder) { decoder_name_ = decoder; } - const auto & getEncoding() const { return (expected_encoding_); } - const auto & getFrameId() const { return (expected_frame_id_); } - const auto & getWidth() const { return (expected_width_); } - const auto & getHeight() const { return (expected_height_); } - const auto & getImageSum() const { return (image_sum_); } - const auto & getTsSum() const { return (ts_sum_); } - const auto & getFrameCounter() const { return (frame_counter_); } - const auto & getPacketCounter() const { return (packet_counter_); } - - // entry point for the encoded packet (passthrough from EncoderTester) - void packetReady( - const std::string & frame_id, const rclcpp::Time & stamp, const std::string & codec, - uint32_t width, uint32_t height, uint64_t pts, uint8_t flags, uint8_t * data, size_t sz) - { - ffmpeg_image_transport_msgs::msg::FFMPEGPacket msg; - msg.header.frame_id = frame_id; - msg.header.stamp = stamp; - msg.encoding = codec; - msg.flags = flags; - msg.pts = pts; - msg.width = width; - msg.height = height; - msg.data.reserve(sz); - std::copy(data, data + sz, std::back_inserter(msg.data)); - decodePacket(msg); - } - - void addImage(const Image::ConstSharedPtr & img) - { - time_to_image_.insert({rclcpp::Time(img->header.stamp).nanoseconds(), img}); - } - - void finalize() { decoder_.flush(); } - -private: - void decodePacket(const FFMPEGPacket & msg) - { - EXPECT_EQ(msg.header.frame_id, expected_frame_id_); - EXPECT_EQ(msg.encoding, expected_encoding_); - EXPECT_EQ(msg.width, expected_width_); - EXPECT_EQ(msg.height, expected_height_); - - if (!decoder_.isInitialized()) { - decoder_.initialize( - msg.encoding, std::bind(&DecoderTester::imageCallback, this, _1, _2), decoder_name_); - } - if (!decoder_.decodePacket( - msg.encoding, &msg.data[0], msg.data.size(), msg.pts, msg.header.frame_id, - msg.header.stamp)) { - std::cerr << "error decoding packet!" << std::endl; - throw(std::runtime_error("error decoding packet!")); - } - packet_counter_++; - } - - void imageCallback(const Image::ConstSharedPtr & img, bool /* isKeyFrame */) - { - const auto stamp = rclcpp::Time(img->header.stamp).nanoseconds(); - const auto it = time_to_image_.find(stamp); - if (it == time_to_image_.end()) { - std::cerr << "cannot find image from time stamp " << stamp << std::endl; - ; - throw(std::runtime_error("image time stamp not found")); - } - const auto & orig = it->second; - EXPECT_EQ(img->header.frame_id, orig->header.frame_id); - EXPECT_EQ(img->header.stamp, orig->header.stamp); - EXPECT_EQ(img->encoding, orig->encoding); - EXPECT_EQ(img->is_bigendian, orig->is_bigendian); - EXPECT_EQ(img->width, orig->width); - EXPECT_EQ(img->height, orig->height); - EXPECT_EQ(img->step, orig->step); - EXPECT_EQ(img->data.size(), orig->data.size()); - EXPECT_EQ(img->step, img->width * 3); - EXPECT_EQ(img->step * img->height, orig->data.size()); - EXPECT_GE(img->data.size(), 1); - bool data_equal{true}; - for (size_t i = 0; i < img->data.size(); i++) { - if (img->data[i] != orig->data[i]) { - std::cout << "mismatch at " << i << ", orig: " << static_cast(orig->data[i]) - << " now: " << static_cast(img->data[i]) << std::endl; - data_equal = false; - } - } - EXPECT_TRUE(data_equal); - frame_counter_++; - } - - std::string expected_frame_id_; - std::string expected_encoding_; - uint32_t expected_width_{0}; - uint32_t expected_height_{0}; - uint64_t image_sum_{0}; - int64_t ts_sum_{0}; - uint64_t frame_counter_{0}; - uint64_t packet_counter_{0}; - std::unordered_map time_to_image_; - std::string decoder_name_; - ffmpeg_encoder_decoder::Decoder decoder_; -}; - -void test_encoder_msg(int numFrames, const std::string & encoder, EncoderTester * tester) -{ - ffmpeg_encoder_decoder::Encoder enc; - enc.setEncoder(encoder); - enc.addAVOption("profile", "main"); - enc.addAVOption("preset", "slow"); - enc.setQMax(10); - enc.setBitRate(8242880); - enc.setGOPSize(2); - enc.setFrameRate(100, 1); - - if (!enc.initialize( - tester->getWidth(), tester->getHeight(), - std::bind(&EncoderTester::packetReady, tester, _1, _2, _3, _4, _5, _6, _7, _8, _9))) { - std::cerr << "failed to initialize encoder!" << std::endl; - return; - } - Image image; - image.encoding = "bgr8"; - image.width = tester->getWidth(); - image.height = tester->getHeight(); - image.step = image.width * 3; // 3 bytes per pixel - image.header.frame_id = tester->getFrameId(); - image.is_bigendian = false; - - for (int64_t i = 0; i < numFrames; i++) { - image.header.stamp = rclcpp::Time(i + 1, RCL_SYSTEM_TIME); - image.data.resize(image.step * image.height, static_cast(i)); - enc.encodeImage(image); - } - enc.flush(); -} - -void test_encoder_decoder_msg( - int numFrames, const std::string & encoder, const std::string & decoder, - EncoderTester * enc_tester, DecoderTester * dec_tester) -{ - dec_tester->setDecoder(decoder); - ffmpeg_encoder_decoder::Encoder enc; - enc.setEncoder(encoder); - enc.addAVOption("x265-params", "lossless=1"); - enc.addAVOption("crf", "0"); // may not be needed for lossless - enc.setAVSourcePixelFormat("gray"); - enc.setCVBridgeTargetFormat("mono8"); - enc_tester->setCallback( - std::bind(&DecoderTester::packetReady, dec_tester, _1, _2, _3, _4, _5, _6, _7, _8, _9)); - - if (!enc.initialize( - enc_tester->getWidth(), enc_tester->getHeight(), - std::bind(&EncoderTester::packetReady, enc_tester, _1, _2, _3, _4, _5, _6, _7, _8, _9))) { - std::cerr << "failed to initialize encoder!" << std::endl; - return; - } - - for (int64_t i = 0; i < numFrames; i++) { - auto image = std::make_shared(); - image->encoding = "bgr8"; - image->width = enc_tester->getWidth(); - image->height = enc_tester->getHeight(); - image->step = image->width * 3; // 3 bytes per pixel - image->header.frame_id = enc_tester->getFrameId(); - image->is_bigendian = false; - - image->header.stamp = rclcpp::Time(i + 1, RCL_SYSTEM_TIME); - image->data.resize(image->step * image->height, static_cast(i)); - dec_tester->addImage(image); - enc.encodeImage(*image); - } - enc.flush(); - dec_tester->finalize(); -} - -TEST(ffmpeg_encoder_decoder, encoder_msg) -{ - const int n = 10; - EncoderTester tester("frame_id", "h264", 640, 480); - test_encoder_msg(n, "libx264", &tester); - // EXPECT_EQ(tester.getPacketSum(), 115852); varies between ffmpeg versions - EXPECT_EQ(tester.getTsSum(), (n * (n + 1)) / 2); - EXPECT_EQ(tester.getPtsSum(), (n * (n - 1)) / 2); - EXPECT_EQ(tester.getPacketCounter(), n); -} - -TEST(ffmpeg_encoder_decoder, encoder_decoder_msg) -{ - const int n = 10; - const int w = 640; - const int h = 480; - EncoderTester enc_tester("frame_id", "hevc", w, h); - DecoderTester dec_tester("frame_id", "hevc", w, h); - test_encoder_decoder_msg(n, "libx265", "hevc", &enc_tester, &dec_tester); - // EXPECT_EQ(enc_tester.getPacketSum(), 266674); differs with ffmpeg version - EXPECT_EQ(enc_tester.getTsSum(), (n * (n + 1)) / 2); - EXPECT_EQ(enc_tester.getPtsSum(), (n * (n - 1)) / 2); - EXPECT_EQ(enc_tester.getPacketCounter(), n); - EXPECT_EQ(dec_tester.getPacketCounter(), n); - EXPECT_EQ(dec_tester.getFrameCounter(), n); -} - -int main(int argc, char ** argv) -{ - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} From 3c314bd8e58aa4dbdd4fe5a4af256340d23cc522 Mon Sep 17 00:00:00 2001 From: Bernd Pfrommer Date: Sat, 26 Jul 2025 04:07:58 -0400 Subject: [PATCH 08/12] fix uninitialized memory bug --- include/ffmpeg_encoder_decoder/decoder.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ffmpeg_encoder_decoder/decoder.hpp b/include/ffmpeg_encoder_decoder/decoder.hpp index a46da24..cfaa343 100644 --- a/include/ffmpeg_encoder_decoder/decoder.hpp +++ b/include/ffmpeg_encoder_decoder/decoder.hpp @@ -251,7 +251,7 @@ class Decoder AVFrame * cpuFrame_{NULL}; AVFrame * outputFrame_{NULL}; SwsContext * swsContext_{NULL}; - enum AVPixelFormat hwPixFormat_; + enum AVPixelFormat hwPixFormat_ { AV_PIX_FMT_NONE }; std::string outputMsgEncoding_; AVPacket packet_; AVBufferRef * hwDeviceContext_{NULL}; From 81cdac04d866e459bdcc7ce27b61752342436b51 Mon Sep 17 00:00:00 2001 From: Bernd Pfrommer Date: Mon, 28 Jul 2025 15:13:31 -0400 Subject: [PATCH 09/12] new feature: can set AV options for decoder --- include/ffmpeg_encoder_decoder/decoder.hpp | 11 +++++++++++ src/decoder.cpp | 18 ++++++++++++++++++ src/encoder.cpp | 2 +- test/encoder_decoder_tester.cpp | 4 ++-- 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/include/ffmpeg_encoder_decoder/decoder.hpp b/include/ffmpeg_encoder_decoder/decoder.hpp index cfaa343..9305280 100644 --- a/include/ffmpeg_encoder_decoder/decoder.hpp +++ b/include/ffmpeg_encoder_decoder/decoder.hpp @@ -217,6 +217,15 @@ class Decoder * \brief resets performance debugging timers. Poorly tested, may be broken. */ void resetTimers(); + /** + * \brief adds AVOption setting to list of options to be applied before opening the encoder + * \param key name of AVOption to set, e.g. "preset" + * \param value value of AVOption e.g. "slow" + */ + void addAVOption(const std::string & key, const std::string & value) + { + avOptions_.push_back({key, value}); + } // ------------------- deprecated functions --------------- /** @@ -234,11 +243,13 @@ class Decoder const std::vector & valid_decoders); int receiveFrame(); int convertFrameToMessage(const AVFrame * frame, const ImagePtr & image); + void setAVOption(const std::string & field, const std::string & value); // --------------- variables rclcpp::Logger logger_; Callback callback_; PTSMap ptsToStamp_; + std::vector> avOptions_; // --- performance analysis bool measurePerformance_{false}; TDiff tdiffTotal_; diff --git a/src/decoder.cpp b/src/decoder.cpp index 793bc16..3a1b477 100644 --- a/src/decoder.cpp +++ b/src/decoder.cpp @@ -308,6 +308,12 @@ bool Decoder::initSingleDecoder(const std::string & decoder) } } codecContext_->pkt_timebase = timeBase_; + std::stringstream ss; + for (const auto & kv : avOptions_) { + setAVOption(kv.first, kv.second); + ss << " " << kv.first << "=" << kv.second; + } + RCLCPP_INFO(logger_, "using decoder %10s with options: %s", decoder.c_str(), ss.str().c_str()); if (avcodec_open2(codecContext_, codec, NULL) < 0) { av_free(codecContext_); @@ -486,6 +492,18 @@ void Decoder::printTimers(const std::string & prefix) const RCLCPP_INFO_STREAM(logger_, prefix << " total decode: " << tdiffTotal_); } +void Decoder::setAVOption(const std::string & field, const std::string & value) +{ + if (!value.empty() && codecContext_ && codecContext_->priv_data) { + const int err = + av_opt_set(codecContext_->priv_data, field.c_str(), value.c_str(), AV_OPT_SEARCH_CHILDREN); + if (err != 0) { + RCLCPP_ERROR_STREAM( + logger_, "cannot set option " << field << " to value " << value << ": " << utils::err(err)); + } + } +} + const std::unordered_map & Decoder::getDefaultEncoderToDecoderMap() { RCLCPP_INFO_STREAM( diff --git a/src/encoder.cpp b/src/encoder.cpp index 85763a1..dc8554c 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -326,7 +326,7 @@ void Encoder::doOpenCodec(int width, int height, const std::string &) void Encoder::setAVOption(const std::string & field, const std::string & value) { - if (!value.empty()) { + if (!value.empty() && codecContext_ && codecContext_->priv_data) { const int err = av_opt_set(codecContext_->priv_data, field.c_str(), value.c_str(), AV_OPT_SEARCH_CHILDREN); if (err != 0) { diff --git a/test/encoder_decoder_tester.cpp b/test/encoder_decoder_tester.cpp index c586683..e6964a1 100644 --- a/test/encoder_decoder_tester.cpp +++ b/test/encoder_decoder_tester.cpp @@ -145,8 +145,8 @@ void EncoderDecoderTester::imageCallback( EXPECT_EQ(img->step, orig->step); EXPECT_EQ(img->step * img->height, orig->data.size()); EXPECT_GE(img->data.size(), 1); - const int num_channels = sensor_msgs::image_encodings::bitDepth(img->encoding) / 8; - const int num_orig_channels = sensor_msgs::image_encodings::bitDepth(orig->encoding) / 8; + const int num_channels = sensor_msgs::image_encodings::numChannels(img->encoding); + const int num_orig_channels = sensor_msgs::image_encodings::numChannels(orig->encoding); if (num_channels != num_orig_channels) { std::cerr << "num channel mismatch! orig: " << num_orig_channels << " vs now: " << num_channels << std::endl; From c18a15ccc32d6d02d20979b9f837abd41eea3afb Mon Sep 17 00:00:00 2001 From: Bernd Pfrommer Date: Thu, 31 Jul 2025 04:45:17 -0400 Subject: [PATCH 10/12] use comma-separated decoder list now, use semi-colon to separate encodings --- include/ffmpeg_encoder_decoder/decoder.hpp | 32 +---- include/ffmpeg_encoder_decoder/encoder.hpp | 4 +- include/ffmpeg_encoder_decoder/utils.hpp | 17 +++ src/decoder.cpp | 132 +++++++-------------- src/encoder.cpp | 11 +- src/utils.cpp | 29 +++++ test/encoder_decoder_test.cpp | 6 +- 7 files changed, 108 insertions(+), 123 deletions(-) diff --git a/include/ffmpeg_encoder_decoder/decoder.hpp b/include/ffmpeg_encoder_decoder/decoder.hpp index 9305280..4c84781 100644 --- a/include/ffmpeg_encoder_decoder/decoder.hpp +++ b/include/ffmpeg_encoder_decoder/decoder.hpp @@ -105,30 +105,13 @@ class Decoder * \brief Initializes the decoder for a given codec and libav decoder. * * Initializes the decoder, with multiple decoders to pick from. - * If the name of the libav decoder string is empty, a suitable libav decoder - * will be picked, or the initialization will fail if none is available. * \param codec the codec (encoding) from the first packet. Can never change! * \param callback the function to call when frame has been decoded. - * \param decoder the name of the libav decoder to use. If empty string, - * the decoder will try to find a suitable one based on the encoding. + * \param decoder the name of the libav decoder to use. * \return true if initialized successfully. */ bool initialize(const std::string & codec, Callback callback, const std::string & decoder); - /** - * \ brief initializes the decoder, trying different libavdecoders in order. - * - * Initialize decoder with multiple libav decoders to pick from. - * If decoders.empty() a default decoder will be chosen (if available). - * \param codec the codec (encoding) from the first packet. Can never change! - * \param callback the function to call when frame has been decoded. - * \param decoders names of the libav decoders to try sequentially. If empty() - * the decoder will try to find a suitable one based on the codec. - * \return true if successful - */ - bool initialize( - const std::string & codec, Callback callback, const std::vector & decoders); - /** * \brief Sets the ROS output message encoding format. * @@ -197,9 +180,9 @@ class Decoder * Finds the name of all hardware and software decoders (combined) * that match a certain codec (or encoder). * \param codec name of the codec, i.e. h264, hevc etc - * \return vector with names of matching libav decoders + * \return string with comma-separated list of libav decoders */ - static std::vector findDecoders(const std::string & codec); + static std::string findDecoders(const std::string & codec); /** * \brief Enables or disables performance measurements. Poorly tested, may be broken. @@ -236,15 +219,12 @@ class Decoder getDefaultEncoderToDecoderMap(); private: - bool initSingleDecoder(const std::string & decoder); - bool initDecoder(const std::vector & decoders); - std::vector filterDecoders( - const std::string & encoding, const std::vector & decoders, - const std::vector & valid_decoders); + bool doInitDecoder(const std::string & encoding, const std::string & decoder); + bool initDecoder(const std::string & encoding, const std::string & decoders); int receiveFrame(); int convertFrameToMessage(const AVFrame * frame, const ImagePtr & image); void setAVOption(const std::string & field, const std::string & value); - + void setEncoding(const std::string & encoding); // --------------- variables rclcpp::Logger logger_; Callback callback_; diff --git a/include/ffmpeg_encoder_decoder/encoder.hpp b/include/ffmpeg_encoder_decoder/encoder.hpp index ff2acac..f9e4d3d 100644 --- a/include/ffmpeg_encoder_decoder/encoder.hpp +++ b/include/ffmpeg_encoder_decoder/encoder.hpp @@ -269,7 +269,8 @@ class Encoder * \param callback the function to call for handling encoded packets * \param encoding the ros encoding string, e.g. bayer_rggb8, rgb8 ... */ - bool initialize(int width, int height, Callback callback, const std::string & encoding); + bool initialize( + int width, int height, Callback callback, const std::string & encoding = std::string()); /** * \brief sets ROS logger to use for info/error messages * \param logger the logger to use for messages @@ -383,6 +384,7 @@ class Encoder AVRational timeBase_{1, 100}; AVRational frameRate_{100, 1}; bool usesHardwareFrames_{false}; + std::string avSourcePixelFormat_; // ------ libav state AVCodecContext * codecContext_{nullptr}; AVBufferRef * hwDeviceContext_{nullptr}; diff --git a/include/ffmpeg_encoder_decoder/utils.hpp b/include/ffmpeg_encoder_decoder/utils.hpp index da1cf31..9dce509 100644 --- a/include/ffmpeg_encoder_decoder/utils.hpp +++ b/include/ffmpeg_encoder_decoder/utils.hpp @@ -112,6 +112,23 @@ void find_decoders( const std::string & codec, std::vector * hw_decoders, std::vector * sw_decoders); +/** + * \brief finds the names of all available decoders + * for a given codec (or encoder) + * \param codec the codec / encoding to find decoders for + * \return string with comma separated list of libav decoder names + */ + +std::string find_decoders(const std::string & codec); +/** + * \brief filters a string with comma-separated decoders and + * + * \param codec the codec / encoding to filter for + * \param decoders string with comma-separated list of decoder names + * \return string with comma separated list of libav decoder names + */ +std::string filter_decoders(const std::string & codec, const std::string & decoders); + /** * \brief gets list of names of all libav supported device types * diff --git a/src/decoder.cpp b/src/decoder.cpp index 3a1b477..0443b69 100644 --- a/src/decoder.cpp +++ b/src/decoder.cpp @@ -50,6 +50,7 @@ void Decoder::reset() outputFrame_ = NULL; hwPixFormat_ = AV_PIX_FMT_NONE; outputMsgEncoding_ = ""; + packetEncoding_ = ""; } void Decoder::setOutputMessageEncoding(const std::string & output_encoding) @@ -58,61 +59,29 @@ void Decoder::setOutputMessageEncoding(const std::string & output_encoding) outputMsgEncoding_ = output_encoding; } -bool Decoder::initialize( - const std::string & encoding, Callback callback, const std::string & decoder) +static std::vector splitEncoding(const std::string & encoding) { - return (initialize( - encoding, callback, - decoder.empty() ? std::vector() : std::vector{decoder})); + return (utils::split_by_char(encoding, ';')); } -bool Decoder::initialize( - const std::string & encoding, Callback callback, const std::vector & decoders) +void Decoder::setEncoding(const std::string & encoding) { - callback_ = callback; packetEncoding_ = encoding; - const auto split = utils::split_by_char(encoding, '/'); + const auto split = splitEncoding(encoding); if (outputMsgEncoding_.empty()) { // assume orig was bgr8 - outputMsgEncoding_ = split.size() > 1 ? split[1] : "bgr8"; + outputMsgEncoding_ = split.size() == 4 ? split[3] : "bgr8"; RCLCPP_INFO_STREAM( logger_, - "output image encoding: " << outputMsgEncoding_ << ((split.size() > 1) ? "" : " (default)")); - } - const auto all_decoders = findDecoders(split[0]); - if (all_decoders.empty()) { - RCLCPP_ERROR_STREAM(logger_, "no decoders discovered for code:c " << split[0]); - throw(std::runtime_error("no decoders discovered for codec: " + split[0])); - } - if (decoders.empty()) { // try all libav-discovered decoders - std::string decoders_str; - for (const auto & decoder : all_decoders) { - decoders_str += " " + decoder; - } - RCLCPP_INFO_STREAM(logger_, "trying discovered decoders in order:" << decoders_str); - return (initDecoder(all_decoders)); - } - const auto good_decoders = filterDecoders(encoding, decoders, all_decoders); - if (good_decoders.empty()) { - return (false); + "output image encoding: " << outputMsgEncoding_ << ((split.size() == 4) ? "" : " (default)")); } - return (initDecoder(good_decoders)); } -std::vector Decoder::filterDecoders( - const std::string & encoding, const std::vector & decoders, - const std::vector & valid_decoders) +bool Decoder::initialize( + const std::string & encoding, Callback callback, const std::string & decoder) { - std::vector good_decoders; - for (const auto & dec : decoders) { // filter for decoders matching codec - if (std::find(valid_decoders.begin(), valid_decoders.end(), dec) != valid_decoders.end()) { - good_decoders.push_back(dec); - } else { - RCLCPP_WARN_STREAM( - logger_, "configured decoder: " << dec << " cannot handle encoding: " << encoding); - } - } - return (good_decoders); + callback_ = callback; + return (initDecoder(encoding, decoder)); } static AVBufferRef * hw_decoder_init( @@ -232,46 +201,32 @@ enum AVPixelFormat get_format(struct AVCodecContext * avctx, const enum AVPixelF return AV_PIX_FMT_NONE; } -bool Decoder::initDecoder(const std::vector & decoders) +bool Decoder::initDecoder(const std::string & encoding, const std::string & decoder) { - if (decoders.empty()) { - RCLCPP_ERROR_STREAM(logger_, "no decoders configured for this encoding!"); - throw(std::runtime_error("no decoders configured for this encoding!")); + const AVCodec * codec = avcodec_find_decoder_by_name(decoder.c_str()); + if (!codec) { + RCLCPP_WARN_STREAM(logger_, "decoder " << decoder << " cannot decode " << encoding); + return (false); } - for (const auto & decoder : decoders) { - const AVCodec * codec = avcodec_find_decoder_by_name(decoder.c_str()); - if (codec) { - // use the decoder if it either is software, or has working - // hardware support - if (codec->capabilities & AV_CODEC_CAP_HARDWARE) { - const AVCodecHWConfig * hwConfig = avcodec_get_hw_config(codec, 0); - if (hwConfig) { - if (initSingleDecoder(decoder)) { - return (true); - } - } else { - RCLCPP_INFO_STREAM(logger_, "ignoring decoder with no hardware config: " << decoder); - } - } else { - if (initSingleDecoder(decoder)) { - return (true); - } - } - } else { - RCLCPP_WARN_STREAM(logger_, "unknown decoder: " << decoder); + // use the decoder if it either is software, or has working + // hardware support + const AVCodecHWConfig * hwConfig = nullptr; + if (codec->capabilities & AV_CODEC_CAP_HARDWARE) { + hwConfig = avcodec_get_hw_config(codec, 0); + if (!hwConfig) { + RCLCPP_INFO_STREAM(logger_, "ignoring decoder with no hardware config: " << decoder); + return (false); } } - if (decoders.size() > 1) { - RCLCPP_ERROR_STREAM(logger_, "none of these requested decoders works: "); - for (const auto & decoder : decoders) { - RCLCPP_ERROR_STREAM(logger_, " " << decoder); - } + if (!doInitDecoder(encoding, decoder)) { + return (false); } - throw(std::runtime_error("cannot find matching decoder!")); + return (true); } -bool Decoder::initSingleDecoder(const std::string & decoder) +bool Decoder::doInitDecoder(const std::string & encoding, const std::string & decoder) { + setEncoding(encoding); try { const AVCodec * codec = avcodec_find_decoder_by_name(decoder.c_str()); if (!codec) { @@ -373,7 +328,6 @@ int Decoder::receiveFrame() } } AVFrame * frame = isAcc ? cpuFrame_ : swFrame_; - if (frame->width == ctx->width && frame->height == ctx->height) { // prepare the decoded message ImagePtr image(new Image()); @@ -504,14 +458,6 @@ void Decoder::setAVOption(const std::string & field, const std::string & value) } } -const std::unordered_map & Decoder::getDefaultEncoderToDecoderMap() -{ - RCLCPP_INFO_STREAM( - rclcpp::get_logger("ffmpeg_decoder"), - "default map is deprecated, use findDecoders() or pass empty string instead!"); - throw(std::runtime_error("default map is deprecated!")); -} - void Decoder::findDecoders( const std::string & codec, std::vector * hw_decoders, std::vector * sw_decoders) @@ -519,11 +465,21 @@ void Decoder::findDecoders( utils::find_decoders(codec, hw_decoders, sw_decoders); } -std::vector Decoder::findDecoders(const std::string & codec) +std::string Decoder::findDecoders(const std::string & codec) { - std::vector sw_decoders, all_decoders; - utils::find_decoders(codec, &all_decoders, &sw_decoders); - all_decoders.insert(all_decoders.end(), sw_decoders.begin(), sw_decoders.end()); - return (all_decoders); + return (utils::find_decoders(codec)); } + +// -------------- deprecated, DO NOT USE ------------------ +const std::unordered_map & Decoder::getDefaultEncoderToDecoderMap() +{ + static const std::unordered_map defaultMap{ + {{"h264_nvenc", "h264"}, + {"libx264", "h264"}, + {"hevc_nvenc", "hevc_cuvid"}, + {"h264_nvmpi", "h264"}, + {"h264_vaapi", "h264"}}}; + return (defaultMap); +} + } // namespace ffmpeg_encoder_decoder diff --git a/src/encoder.cpp b/src/encoder.cpp index dc8554c..80f1de4 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -144,9 +144,6 @@ void Encoder::openHardwareDevice( logger_, "user overriding software pix fmt " << utils::pix(frames_ctx->sw_format)); RCLCPP_INFO_STREAM(logger_, "with " << utils::pix(pixFormat_)); frames_ctx->sw_format = pixFormat_; // override default at your own risk! - } else { - RCLCPP_INFO_STREAM( - logger_, "using software pixel format: " << utils::pix(frames_ctx->sw_format)); } if (frames_ctx->sw_format == AV_PIX_FMT_NONE) { av_buffer_unref(&hwframe_ctx_ref); @@ -198,10 +195,9 @@ enum AVPixelFormat Encoder::findMatchingSourceFormat( return (utils::ros_to_av_pix_format(rosSrcFormat)); } -void Encoder::doOpenCodec(int width, int height, const std::string &) +void Encoder::doOpenCodec(int width, int height, const std::string & origEncoding) { int err = 0; - encoding_ = codec_ + "/" + cvBridgeTargetFormat_; codecContext_ = nullptr; if (encoder_.empty()) { @@ -255,6 +251,9 @@ void Encoder::doOpenCodec(int width, int height, const std::string &) : utils::get_preferred_pixel_format(usesHardwareFrames_, pixFmts); codecContext_->sw_pix_fmt = codecContext_->pix_fmt; } + avSourcePixelFormat_ = utils::pix(codecContext_->sw_pix_fmt); + RCLCPP_INFO_STREAM(logger_, "using av_source_pixel_format: " << avSourcePixelFormat_); + std::stringstream ss; for (const auto & kv : avOptions_) { setAVOption(kv.first, kv.second); @@ -322,6 +321,8 @@ void Encoder::doOpenCodec(int width, int height, const std::string &) throw(std::runtime_error("cannot allocate sws context")); } } + encoding_ = codec_ + ";" + avSourcePixelFormat_ + ";" + cvBridgeTargetFormat_ + ";" + + (origEncoding.empty() ? cvBridgeTargetFormat_ : origEncoding); } void Encoder::setAVOption(const std::string & field, const std::string & value) diff --git a/src/utils.cpp b/src/utils.cpp index 12ede02..4c5515d 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -294,6 +294,35 @@ void find_decoders( find_decoders(real_encoding, sw_decoders, false); } +std::string find_decoders(const std::string & codec) +{ + std::string decoders; + std::vector hw_dec; + std::vector sw_dec; + find_decoders(codec, &hw_dec, &sw_dec); + for (const auto & s : hw_dec) { + decoders += (decoders.empty() ? "" : ",") + s; + } + for (const auto & s : sw_dec) { + decoders += (decoders.empty() ? "" : ",") + s; + } + return decoders; +} + +std::string filter_decoders(const std::string & codec, const std::string & decoders) +{ + std::string filtered; + const auto valid_decoders = split_by_char(find_decoders(codec), ','); + for (const auto & dec : split_by_char(decoders, ',')) { + for (const auto & valid : valid_decoders) { + if (dec == valid) { + filtered += (filtered.empty() ? "" : ",") + dec; + } + } + } + return (filtered); +} + std::string find_codec(const std::string & encoder) { const AVCodec * c = find_by_name(encoder); diff --git a/test/encoder_decoder_test.cpp b/test/encoder_decoder_test.cpp index 654850d..bf525b9 100644 --- a/test/encoder_decoder_test.cpp +++ b/test/encoder_decoder_test.cpp @@ -28,9 +28,9 @@ TEST(ffmpeg_encoder_decoder, encoder_decoder_bgr8) tester.setCVBridgeTargetFormat(orig_enc); // encoder only supports bgr0, bgr24, rgb24 // tester.setAVSourcePixelFormat("bgr24"); // automatically picked - tester.setExpectedPacketEncoding("h264/" + orig_enc); tester.addAVOption("crf", "0"); // needed for lossless // decoding options + tester.setExpectedPacketEncoding("h264;bgr24;" + orig_enc + ";" + orig_enc); tester.setFinalROSEncoding(orig_enc); tester.setDecoder("h264"); // codec == encoder in this case tester.setDecoderExpectedAVPixFmt("gbrp"); // picked by decoder @@ -48,7 +48,7 @@ TEST(ffmpeg_encoder_decoder, encoder_decoder_mono) tester.setCVBridgeTargetFormat(orig_enc); tester.setAVSourcePixelFormat("gray"); tester.addAVOption("x265-params", "lossless=1"); - tester.setExpectedPacketEncoding("hevc/" + orig_enc); + tester.setExpectedPacketEncoding("hevc;gray;" + orig_enc + ";" + orig_enc); tester.addAVOption("crf", "0"); // may not be needed for lossless // decoding options tester.setFinalROSEncoding(orig_enc); @@ -73,7 +73,7 @@ TEST(ffmpeg_encoder_decoder, encoder_decoder_bayer) // color conversion, and do this losslessly. // tester.setAVSourcePixelFormat("yuv420p"); // not needed tester.addAVOption("x265-params", "lossless=1"); - tester.setExpectedPacketEncoding("hevc/" + orig_enc); + tester.setExpectedPacketEncoding("hevc;yuv420p;" + orig_enc + ";" + orig_enc); // --- decoding options tester.setFinalROSEncoding(orig_enc); tester.setDecoder("hevc"); From 61c5e655d13c8fdd6647f69c756b5dd24dc80253 Mon Sep 17 00:00:00 2001 From: Bernd Pfrommer Date: Sat, 2 Aug 2025 07:59:54 -0400 Subject: [PATCH 11/12] improved documentation --- README.md | 104 +- doc/encoder_decoder.dia | 1256 ++++++++++++++++++++ doc/encoder_decoder.svg | 155 +++ include/ffmpeg_encoder_decoder/decoder.hpp | 3 +- 4 files changed, 1471 insertions(+), 47 deletions(-) create mode 100644 doc/encoder_decoder.dia create mode 100644 doc/encoder_decoder.svg diff --git a/README.md b/README.md index 4046129..fa00baf 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,8 @@ This ROS2 package supports encoding/decoding with the FFMpeg library, for example encoding h264 and h265 or HEVC, using Nvidia or other hardware acceleration when available. This package is meant to be used by image transport plugins like -the [ffmpeg image transport](https://github.com/ros-misc-utilities/ffmpeg_image_transport/). +the [ffmpeg image transport](https://github.com/ros-misc-utilities/ffmpeg_image_transport/) +and the [foxglove compressed video transport](https://github.com/ros-misc-utilities/foxglove_compressed_video_transport/). ## Supported systems @@ -12,6 +13,7 @@ Continuous integration is tested under Ubuntu with the following ROS2 distros: [![Build Status](https://build.ros2.org/buildStatus/icon?job=Hdev__ffmpeg_encoder_decoder__ubuntu_jammy_amd64&subject=Humble)](https://build.ros2.org/job/Hdev__ffmpeg_encoder_decoder__ubuntu_jammy_amd64/) [![Build Status](https://build.ros2.org/buildStatus/icon?job=Jdev__ffmpeg_encoder_decoder__ubuntu_noble_amd64&subject=Jazzy)](https://build.ros2.org/job/Jdev__ffmpeg_encoder_decoder__ubuntu_noble_amd64/) + [![Build Status](https://build.ros2.org/buildStatus/icon?job=Kdev__ffmpeg_encoder_decoder__ubuntu_noble_amd64&subject=Kilted)](https://build.ros2.org/job/Kdev__ffmpeg_encoder_decoder__ubuntu_noble_amd64/) [![Build Status](https://build.ros2.org/buildStatus/icon?job=Rdev__ffmpeg_encoder_decoder__ubuntu_noble_amd64&subject=Rolling)](https://build.ros2.org/job/Rdev__ffmpeg_encoder_decoder__ubuntu_noble_amd64/) @@ -34,10 +36,59 @@ and follow the [instructions here](https://github.com/ros-misc-utilities/.github Make sure to source your workspace's ``install/setup.bash`` afterwards. -## ROS Parameters +## API overview -This package does not expose ROS parameters. It is the upper layer's responsibility to e.g. manage the mapping between encoder and decoder, i.e. to tell the decoder class which libav decoder should be used for the decoding, or to set the encoding parameters. +### Preliminaries +When using libav it is important to understand the difference between the encoder and the codec. +The *codec* is the standardized format in which images are encoded, for instance ``h264`` or ``hevc``. +The *encoder* is a libav software module that can encode images for a given codec. For instance ``libx264``, ``libx264rgb``, + `h264_nvenc``, and ``h264_vaapi`` are all *encoders* that encode for the *codec* h264. +Some of the encoders are hardware accelerated, some can handle more image formats than others, but in the end they all encode video for a specific codec. +For the many AV options available for various libav encoders, and for ``qmax``, ``bit_rate`` and similar settings please refer to the ffmpeg documentation. + +### The inner workings +![overview_diagram](./doc/encoder_decoder.svg) + +The diagram above shows the stations that a ROS Image message passes as it traverses the encoder and decoder. +1) The ROS image (with [ROS sensor\_msgs/Image encoding](https://github.com/ros2/common_interfaces/blob/a2ef867438e6d4eed074d6e3668ae45187e7de86/sensor_msgs/include/sensor_msgs/image_encodings.hpp)) is first converted with the ROS [cv\_bridge](https://github.com/ros-perception/vision_opencv) into the ``cv_bridge_target_format``. + This conversion is necessary because some ROS encodings (like bayer images) are not supported by libswscale. + The ``cv_bridge_target_format`` can be set via ``setCVBridgeTargetFormat(const std::string & fmt)``. + If this format is not set explicitly the image will be converted to the default format of ``bgr8``. + This may not be what you want for e.g. ``mono8`` (gray) or Bayer images. + Ideally the``cv_bridge_target_format`` can be directly used by the libav decoder so the next step becomes a no-op. + But at the very least ``cv_bridge_target_format`` must be an acceptable libswscale input format (with the exception of + special hacks for encoding single-channel images, see below). +2) The image is then converted to ``av_source_pixel_format`` using libswscale. + The ``av_source_pixel_format`` can be set with ``setAVSourcePixelFormat()``, defaulting to something that is acceptable to the libav encoder. + You can use ffmpeg (``ffmpeg -h encoder=libx264 | grep formats``) to list all formats that an encoder supports. + Note that the ffmpeg/libav format string notation is different from the ROS encoding strings, and the ``av_source_pixel_format`` is specified using the libav convention, whereas the ``cv_bridge_target_format`` uses ROS convention! + (If you choose to bypass the cv\_bridge conversion from step 1 by feeding the images to the encoder directly via the ``encodeImage(const cv::Mat & img ...)`` method, you must still set the ``cv_bridge_target_format`` such that the encoder knows what format the ``img`` argument has.) + When aiming for lossless compression, beware of any ``av_source_pixel_format`` that reduces the color resolution, such as ``yuv420p``, ``nv12`` etc. + For Bayer images, use the special hack for single-channel images. + +3) The libav encoder encodes the packet with its supported codec, e.g. the ``libx264`` will produce ``h264`` packets. + The ``encoding`` field of the FFMPEGPacket message will document all image format conversions and the codec, in reverse order, separated by semicolon. + This way the decoder can attempt to reproduce the original ``ros_encoding``. + +4) The libav decoder decodes the packet into the original ``av_source_pixel_format``. + +5) Finally the image is converted to ``output_message_format`` using libswscale. + This format can be set (in ROS encoding syntax!) with ``setOutputMessageEncoding()``. + The format must be supported by both ROS and libswscale (except when using the special hack for single-channel images). + +Note that only very few combinations of libav encoders, ``cv_bridge_target_format`` and ``av_source_pixel_format`` have been tested. Please provide feedback if you observe crashes or find obvious bugs. PRs are always appreciated! + +### The special single-channel hack + +Many libav encoders do not support single-channel formats (like mono8 or bayer). +For this reason a special hack is implemented in the encoder that adds an empty (zero-value) color channel to the single-channel image. +Later, the decoder removes it again. +To utilitze this hack, specify a ``cv_bridge_target_format`` of e.g. ``bayer_rggb8``. Without the special hack, this would trigger an error because Bayer formats are not acceptable to libswscale. +Instead, the image is converted to ``yuv420p`` or ``nv12`` by adding an empty color channel. +These formats are acceptable to most encoders. +The decoder in turn recognizes that the ``cv_bridge_target_format`` is a single-channel format, but ``yuv420p``/``nv12`` are not, and therefore drops the color channel. +This hack greatly improves the efficiency for lossless encoding of Bayer images because it avoids conversion to full RGB and back. ## API usage @@ -53,42 +104,15 @@ Using the encoder involves the following steps: - flushing the encoder (may result in additional callbacks with encoded packets) - destroying the ``Encoder`` object. -The ``Encoder`` class description has a short example code snippet. - -When using libav it is important to understand the difference between the encoder and the codec. The *codec* is the standardized format in which images are encoded, for instance ``h264`` or ``hevc``. The *encoder* is a software module that can encode images for a given codec. For instance ``libx264``, ``libx264rgb``, ``h264_nvenc``, and ``h264_vaapi`` are all *encoders* that encode for the *codec* h264. -Some of the encoders are hardware accelerated, some can handle more image formats than others, but in the end they all encode video for a specific codec. You set the libav encoder with the ``setEncoder()`` method. - -For the many AV options available for various libav encoders, and for ``qmax``, ``bit_rate`` and similar settings please refer to the ffmpeg documentation. - -The topic of "pixel formats" (also called "encodings") can be very confusing and frustrating, so here a few lines about it. -- Images in ROS arrive as messages that are encoded in one of the formats allowed by the [sensor\_msgs/Image encodings](https://github.com/ros2/common_interfaces/blob/a2ef867438e6d4eed074d6e3668ae45187e7de86/sensor_msgs/include/sensor_msgs/image_encodings.hpp). If you pass that message in via the ``encodeImage(const Image & msg)`` message, it will first will be converted to an opencv matrix using the [cv_bridge](https://github.com/ros-perception/vision_opencv). -If you don't set the ``cv_bridge_target_format`` via ``setCVBridgeTargetFormat(const std::string & fmt)``, the image will be converted to the default format of ``bgr8``. Depending on how performance sensitive you are (or if you have a ``mono8`` (gray) image!), this may not be what you want. Ideally, you should pick a ``cv_bridge_target_format`` that can be directly used by the libav decoder that you have chosen. You can use ffmpeg to list the formats that the libav encoder directly supports: - ``` - ffmpeg -h encoder=libx264 | grep formats - ``` - Note that the ffmpeg/libav format strings often diverge from the ROS encoding strings, so some guesswork and experimentation may be necessary to pick the right ``cv_bridge_target_format``. - If you choose to bypass the cv\_bridge conversion by feeding the images to the encoder directly via the ``encodeImage(const cv::Mat & img ...)`` method, you must still set the ``cv_bridge_target_format`` such that encoder knows what format the ``img`` argument has. -- Once the image is available in ``cv_bridge_target_format`` the encoder may perform another conversion to an image format that the libav encoder supports, the ``av_source_pixel_format``. Again, ``ffmpeg -h encoder=`` shows the supported formats. If you don't set the ``av_source_pixel_format`` with ``setAVSourcePixelFormat()``, the encoder will try to pick one that is supported by the libav encoder. That often works but may not be optimal. -- Finally, the libav encoder produces a packet in its output codec, e.g. ``h264``, ``hevc`` etc. This encoding format is provided as the ``codec`` parameter when the encoder calls back with the encoded packet. Later, the codec needs to be provided to the decoder so it can pick a suitable libav decoder. - -To summarize, the conversion goes as follows: -``` - -> cv_bridge_target_format -> av_source_pixel_format --(libav encoder)--> codec -``` -By properly choosing ``cv_bridge_target_format`` and ``av_source_pixel_format`` two of those conversions -may become no-ops, but to what extend the cv\_bridge and libav actually recognize and optimize for this has not been looked into yet. - -Note that only very few combinations of libav encoders, ``cv_bridge_target_format`` and ``av_source_pixel_format`` have been tested. Please provide feedback if you observe crashes or find obvious bugs. PRs are always appreciated! +The ``Encoder`` class API description has a short example code snippet. ### Decoder Using the decoder involves the following steps: - instantiating the ``Decoder`` object. - if so desired, setting the ROS output (image encoding) format. -- initializing the decoder object. For this you need to know the encoding (codec, e.g. "h264"). - During initialization you also have to present an ordered list of libav decoders that - should be used. If an empty list is provided, the decoder will attempt to find a suitable - libav decoder based on the encoding, with a preference for hardware accelerated decoding. +- initializing the decoder object. For this you need to know the encoding (codec, e.g. "h264"), + and you have to specify the libav decoder name (e.g. "h264_cuvid"). - feeding encoded packets to the decoder (and handling the callbacks when decoded images become available) - flushing the decoder (may result in additional callbacks with decoded images) @@ -126,20 +150,8 @@ depth=1 &&cd jetson-ffmpeg && ./ffpatch.sh ../ffmpeg && cd ../ffmpeg && ./config Then follow the section above on how to actually use that custom ffmpeg library. As always first test on the -CLI that the newly compiled ``ffmpeg`` command now supports -``h264_nvmpi``. The transport can then be configured to use -nvmpi like so: +CLI that the newly compiled ``ffmpeg`` command now supports ``h264_nvmpi``. -``` - parameters=[{'ffmpeg_image_transport.encoding': 'h264_nvmpi', - 'ffmpeg_image_transport.profile': 'main', - 'ffmpeg_image_transport.preset': 'll', - 'ffmpeg_image_transport.gop_size': 15}] -``` -Sometimes the ffmpeg parameters show up under different names. If the above -settings don't work, try the command ``ros2 param dump `` -*after* subscribing to the ffmpeg image topic with e.g. ``ros2 topic hz``. -From the output you can see what the correct parameter names are. ## License This software is issued under the Apache License Version 2.0. diff --git a/doc/encoder_decoder.dia b/doc/encoder_decoder.dia new file mode 100644 index 0000000..ef8d7b4 --- /dev/null +++ b/doc/encoder_decoder.dia @@ -0,0 +1,1256 @@ + + + + + + + + + + + + + #Letter# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #ros_encoding# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #cv_bridge_target_format# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #av_source_pixeL_format# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #libav encoder# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #FFMPEGPacket + + +# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #libav decoder# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #av_source_pixeL_format# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #output_message_format# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #libswscale# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #Image# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #libswscale# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #cv_bridge# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #Image# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #Encoder# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #Decoder# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #codec; +av_source_pixel_format; +cv_bridge_target_format; +ros_encoding# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/encoder_decoder.svg b/doc/encoder_decoder.svg new file mode 100644 index 0000000..f96a421 --- /dev/null +++ b/doc/encoder_decoder.svg @@ -0,0 +1,155 @@ + + + + + + + + + + + + + ros_encoding + + + + + + + + + cv_bridge_target_format + + + + + + av_source_pixeL_format + + + + + + libav encoder + + + + + + + + + + + + + + FFMPEGPacket + + + + + + + + + libav decoder + + + + + + av_source_pixeL_format + + + + + + output_message_format + + + + + + libswscale + + + + + + + + + + + + + + + + + + Image + + + + + + libswscale + + + + + + + + + + + + + + cv_bridge + + + + + + + + + + + + + + + + + + + + + + Image + + + + Encoder + + + Decoder + + + codec; + av_source_pixel_format; + cv_bridge_target_format; + ros_encoding + + + + + + + diff --git a/include/ffmpeg_encoder_decoder/decoder.hpp b/include/ffmpeg_encoder_decoder/decoder.hpp index 4c84781..054844e 100644 --- a/include/ffmpeg_encoder_decoder/decoder.hpp +++ b/include/ffmpeg_encoder_decoder/decoder.hpp @@ -55,8 +55,9 @@ ffmpeg_image_transport_msgs::msg::FFMPEGPacket msg; msg.header.frame_id = "frame_id"; msg.width = 640; msg.height = 480; +msg.step = 640 * 3; msg.encoding = "hevc"; -msg.data.resize(10000, 0); // Obviously this is not a valid packet!!! +msg.data.resize(msg.height * msg.step, 0); // not valid data! if (!decoder.isInitialized()) { decoder.initialize(msg.encoding, imageCallback, "hevc_cuvid"); From a4262123905e92efe6cfb40b07e5e7daabec7d80 Mon Sep 17 00:00:00 2001 From: Bernd Pfrommer Date: Mon, 4 Aug 2025 11:10:16 -0400 Subject: [PATCH 12/12] use mutex to make thread safe --- include/ffmpeg_encoder_decoder/decoder.hpp | 73 ++++++++++++++-------- src/decoder.cpp | 23 +++++-- 2 files changed, 65 insertions(+), 31 deletions(-) diff --git a/include/ffmpeg_encoder_decoder/decoder.hpp b/include/ffmpeg_encoder_decoder/decoder.hpp index 054844e..8292a2a 100644 --- a/include/ffmpeg_encoder_decoder/decoder.hpp +++ b/include/ffmpeg_encoder_decoder/decoder.hpp @@ -21,8 +21,10 @@ #include #include #include +#include #include #include +#include #include extern "C" { @@ -92,7 +94,7 @@ class Decoder Decoder(); /** - * \brief Destructor. Calls reset(); + * \brief Destructor. */ ~Decoder(); @@ -100,7 +102,11 @@ class Decoder * Test if decoder is initialized. * \return true if the decoder is initialized. */ - bool isInitialized() const { return (codecContext_ != NULL); } + bool isInitialized() const + { + Lock lock(mutex_); + return (codecContext_ != NULL); + } /** * \brief Initializes the decoder for a given codec and libav decoder. @@ -160,36 +166,21 @@ class Decoder * \brief Overrides the default ("Decoder") logger. * \param logger the logger to override the default ("Decoder") with. */ - void setLogger(rclcpp::Logger logger) { logger_ = logger; } - - /** - * \brief Finds all hardware and software decoders for a given codec. - * - * Finds the name of all hardware and software decoders that match - * a certain codec (or encoder). - * \param codec name of the codec, i.e. h264, hevc etc - * \param hw_decoders non-null pointer for returning list of hardware decoders - * \param sw_decoders non-null pointer for returning list of software decoders - */ - static void findDecoders( - const std::string & codec, std::vector * hw_decoders, - std::vector * sw_decoders); - - /** - * \brief Finds all decoders that can decode a given codec. - * - * Finds the name of all hardware and software decoders (combined) - * that match a certain codec (or encoder). - * \param codec name of the codec, i.e. h264, hevc etc - * \return string with comma-separated list of libav decoders - */ - static std::string findDecoders(const std::string & codec); + void setLogger(rclcpp::Logger logger) + { + Lock lock(mutex_); + logger_ = logger; + } /** * \brief Enables or disables performance measurements. Poorly tested, may be broken. * \param p set to true to enable performance debugging. */ - void setMeasurePerformance(bool p) { measurePerformance_ = p; } + void setMeasurePerformance(bool p) + { + Lock lock(mutex_); + measurePerformance_ = p; + } /** * \brief Prints performance timers. Poorly tested, may be broken. @@ -201,6 +192,7 @@ class Decoder * \brief resets performance debugging timers. Poorly tested, may be broken. */ void resetTimers(); + /** * \brief adds AVOption setting to list of options to be applied before opening the encoder * \param key name of AVOption to set, e.g. "preset" @@ -208,9 +200,33 @@ class Decoder */ void addAVOption(const std::string & key, const std::string & value) { + Lock lock(mutex_); avOptions_.push_back({key, value}); } + /** + * \brief Finds all hardware and software decoders for a given codec. + * + * Finds the name of all hardware and software decoders that match + * a certain codec (or encoder). + * \param codec name of the codec, i.e. h264, hevc etc + * \param hw_decoders non-null pointer for returning list of hardware decoders + * \param sw_decoders non-null pointer for returning list of software decoders + */ + static void findDecoders( + const std::string & codec, std::vector * hw_decoders, + std::vector * sw_decoders); + + /** + * \brief Finds all decoders that can decode a given codec. + * + * Finds the name of all hardware and software decoders (combined) + * that match a certain codec (or encoder). + * \param codec name of the codec, i.e. h264, hevc etc + * \return string with comma-separated list of libav decoders + */ + static std::string findDecoders(const std::string & codec); + // ------------------- deprecated functions --------------- /** * \deprecated Use findDecoders(codec) instead. @@ -220,17 +236,20 @@ class Decoder getDefaultEncoderToDecoderMap(); private: + using Lock = std::unique_lock; bool doInitDecoder(const std::string & encoding, const std::string & decoder); bool initDecoder(const std::string & encoding, const std::string & decoders); int receiveFrame(); int convertFrameToMessage(const AVFrame * frame, const ImagePtr & image); void setAVOption(const std::string & field, const std::string & value); void setEncoding(const std::string & encoding); + void resetNoLock(); // --------------- variables rclcpp::Logger logger_; Callback callback_; PTSMap ptsToStamp_; std::vector> avOptions_; + mutable std::mutex mutex_; // --- performance analysis bool measurePerformance_{false}; TDiff tdiffTotal_; diff --git a/src/decoder.cpp b/src/decoder.cpp index 0443b69..667ec44 100644 --- a/src/decoder.cpp +++ b/src/decoder.cpp @@ -27,9 +27,15 @@ namespace ffmpeg_encoder_decoder Decoder::Decoder() : logger_(rclcpp::get_logger("Decoder")) {} -Decoder::~Decoder() { reset(); } +Decoder::~Decoder() { resetNoLock(); } void Decoder::reset() +{ + Lock lock(mutex_); + resetNoLock(); +} + +void Decoder::resetNoLock() { if (codecContext_) { avcodec_free_context(&codecContext_); @@ -55,6 +61,7 @@ void Decoder::reset() void Decoder::setOutputMessageEncoding(const std::string & output_encoding) { + Lock lock(mutex_); RCLCPP_INFO_STREAM(logger_, "forcing output encoding: " << output_encoding); outputMsgEncoding_ = output_encoding; } @@ -80,6 +87,7 @@ void Decoder::setEncoding(const std::string & encoding) bool Decoder::initialize( const std::string & encoding, Callback callback, const std::string & decoder) { + Lock lock(mutex_); callback_ = callback; return (initDecoder(encoding, decoder)); } @@ -282,7 +290,7 @@ bool Decoder::doInitDecoder(const std::string & encoding, const std::string & de outputFrame_ = av_frame_alloc(); } catch (const std::runtime_error & e) { RCLCPP_ERROR_STREAM(logger_, e.what()); - reset(); + resetNoLock(); return (false); } RCLCPP_INFO_STREAM(logger_, "decoding with " << decoder); @@ -291,6 +299,7 @@ bool Decoder::doInitDecoder(const std::string & encoding, const std::string & de bool Decoder::flush() { + Lock lock(mutex_); if (!codecContext_) { return (false); } @@ -395,7 +404,8 @@ bool Decoder::decodePacket( const std::string & encoding, const uint8_t * data, size_t size, uint64_t pts, const std::string & frame_id, const rclcpp::Time & stamp) { - if (!isInitialized()) { + Lock lock(mutex_); + if (codecContext_ == nullptr) { RCLCPP_ERROR_STREAM(logger_, "decoder is not initialized!"); return (false); } @@ -439,10 +449,15 @@ bool Decoder::decodePacket( return (rret == AVERROR(EAGAIN)); } -void Decoder::resetTimers() { tdiffTotal_.reset(); } +void Decoder::resetTimers() +{ + Lock lock(mutex_); + tdiffTotal_.reset(); +} void Decoder::printTimers(const std::string & prefix) const { + Lock lock(mutex_); RCLCPP_INFO_STREAM(logger_, prefix << " total decode: " << tdiffTotal_); }