From a9f276f50159dfd5ef54611efe6eb283675e1fec Mon Sep 17 00:00:00 2001 From: Jacob Su Date: Thu, 16 May 2024 19:54:54 +0800 Subject: [PATCH] issue #4052: support h.264 nalu type SEI filter. --- trunk/conf/full.conf | 6 ++ trunk/conf/rtmp2rtc.conf | 1 + trunk/src/app/srs_app_config.cpp | 22 +++++- trunk/src/app/srs_app_config.hpp | 1 + trunk/src/app/srs_app_http_stream.cpp | 2 + trunk/src/app/srs_app_rtc_source.cpp | 35 +++++----- trunk/src/app/srs_app_rtc_source.hpp | 1 + trunk/src/kernel/srs_kernel_codec.cpp | 97 ++++++++++++++++----------- trunk/src/kernel/srs_kernel_codec.hpp | 9 ++- trunk/src/kernel/srs_kernel_error.hpp | 3 +- trunk/src/utest/srs_utest_config.cpp | 3 + 11 files changed, 116 insertions(+), 64 deletions(-) diff --git a/trunk/conf/full.conf b/trunk/conf/full.conf index 7733f04585..81e98f1e2d 100644 --- a/trunk/conf/full.conf +++ b/trunk/conf/full.conf @@ -562,6 +562,12 @@ vhost rtc.vhost.srs.com { # Overwrite by env SRS_VHOST_RTC_KEEP_BFRAME for all vhosts. # default: off keep_bframe off; + # Whether keep the h.264 SEI type NALU packet. + # DJI drone M30T will send many SEI type NALU packet, while + # iOS hardware decoder (Video Toolbox) dislike to feed it + # so many SEI NALU between NonIDR and IDR NALU packets. + # @see https://github.com/ossrs/srs/issues/4052 + keep_avc_nalu_sei on; # The transcode audio bitrate, for RTMP to RTC. # Overwrite by env SRS_VHOST_RTC_OPUS_BITRATE for all vhosts. # [8000, 320000] diff --git a/trunk/conf/rtmp2rtc.conf b/trunk/conf/rtmp2rtc.conf index daa50e2a74..efb070f655 100644 --- a/trunk/conf/rtmp2rtc.conf +++ b/trunk/conf/rtmp2rtc.conf @@ -31,6 +31,7 @@ vhost __defaultVhost__ { rtmp_to_rtc on; # @see https://ossrs.net/lts/zh-cn/docs/v4/doc/webrtc#rtc-to-rtmp rtc_to_rtmp on; + keep_avc_nalu_sei off; } http_remux { enabled on; diff --git a/trunk/src/app/srs_app_config.cpp b/trunk/src/app/srs_app_config.cpp index b7efd889d9..6b0bdb2a4c 100644 --- a/trunk/src/app/srs_app_config.cpp +++ b/trunk/src/app/srs_app_config.cpp @@ -2726,7 +2726,7 @@ srs_error_t SrsConfig::check_normal_config() && m != "bframe" && m != "aac" && m != "stun_timeout" && m != "stun_strict_check" && m != "dtls_role" && m != "dtls_version" && m != "drop_for_pt" && m != "rtc_to_rtmp" && m != "pli_for_rtmp" && m != "rtmp_to_rtc" && m != "keep_bframe" && m != "opus_bitrate" - && m != "aac_bitrate") { + && m != "aac_bitrate" && m != "keep_avc_nalu_sei") { return srs_error_new(ERROR_SYSTEM_CONFIG_INVALID, "illegal vhost.rtc.%s of %s", m.c_str(), vhost->arg0().c_str()); } } @@ -4474,6 +4474,26 @@ bool SrsConfig::get_rtc_keep_bframe(string vhost) return SRS_CONF_PERFER_FALSE(conf->arg0()); } +bool SrsConfig::get_rtc_keep_avc_nalu_sei(std::string vhost) +{ + SRS_OVERWRITE_BY_ENV_BOOL2("srs.vhost.rtc.keep_avc_nalu_sei"); // SRS_VHOST_RTC_KEEP_AVC_NALU_SEI + + static bool DEFAULT = true; + + SrsConfDirective* conf = get_rtc(vhost); + + if (!conf) { + return DEFAULT; + } + + conf = conf->get("keep_avc_nalu_sei"); + if (!conf || conf->arg0().empty()) { + return DEFAULT; + } + + return SRS_CONF_PERFER_TRUE(conf->arg0()); +} + bool SrsConfig::get_rtc_from_rtmp(string vhost) { SRS_OVERWRITE_BY_ENV_BOOL("srs.vhost.rtc.rtmp_to_rtc"); // SRS_VHOST_RTC_RTMP_TO_RTC diff --git a/trunk/src/app/srs_app_config.hpp b/trunk/src/app/srs_app_config.hpp index c6906e5fdf..afa700a0c8 100644 --- a/trunk/src/app/srs_app_config.hpp +++ b/trunk/src/app/srs_app_config.hpp @@ -533,6 +533,7 @@ class SrsConfig SrsConfDirective* get_rtc(std::string vhost); bool get_rtc_enabled(std::string vhost); bool get_rtc_keep_bframe(std::string vhost); + bool get_rtc_keep_avc_nalu_sei(std::string vhost); bool get_rtc_from_rtmp(std::string vhost); srs_utime_t get_rtc_stun_timeout(std::string vhost); bool get_rtc_stun_strict_check(std::string vhost); diff --git a/trunk/src/app/srs_app_http_stream.cpp b/trunk/src/app/srs_app_http_stream.cpp index 181ee14c09..f7f9fb0d7a 100755 --- a/trunk/src/app/srs_app_http_stream.cpp +++ b/trunk/src/app/srs_app_http_stream.cpp @@ -855,6 +855,8 @@ srs_error_t SrsLiveStream::streaming_send_messages(ISrsBufferEncoder* enc, SrsSh if (msg->is_audio()) { err = enc->write_audio(msg->timestamp, msg->payload, msg->size); } else if (msg->is_video()) { + // TODO: drop h.264 flv video tags with NALU SEI here to fix http-flv play error in safari mac. + // @see https://github.com/ossrs/srs/issues/4052 err = enc->write_video(msg->timestamp, msg->payload, msg->size); } else { err = enc->write_metadata(msg->timestamp, msg->payload, msg->size); diff --git a/trunk/src/app/srs_app_rtc_source.cpp b/trunk/src/app/srs_app_rtc_source.cpp index c74b59e9e4..ffae4c4776 100644 --- a/trunk/src/app/srs_app_rtc_source.cpp +++ b/trunk/src/app/srs_app_rtc_source.cpp @@ -739,6 +739,7 @@ SrsRtcRtpBuilder::SrsRtcRtpBuilder(SrsFrameToRtcBridge* bridge, uint32_t assrc, codec_ = new SrsAudioTranscoder(); latest_codec_ = SrsAudioCodecIdForbidden; keep_bframe = false; + keep_avc_nalu_sei = true; merge_nalus = false; meta = new SrsMetaCache(); audio_sequence = 0; @@ -771,8 +772,9 @@ srs_error_t SrsRtcRtpBuilder::initialize(SrsRequest* r) format->try_annexb_first = _srs_config->try_annexb_first(r->vhost); keep_bframe = _srs_config->get_rtc_keep_bframe(req->vhost); + keep_avc_nalu_sei = _srs_config->get_rtc_keep_avc_nalu_sei(req->vhost); merge_nalus = _srs_config->get_rtc_server_merge_nalus(); - srs_trace("RTC bridge from RTMP, keep_bframe=%d, merge_nalus=%d", keep_bframe, merge_nalus); + srs_trace("RTC bridge from RTMP, keep_bframe=%d, keep_avc_nalu_sei=%d, merge_nalus=%d", keep_bframe, keep_avc_nalu_sei, merge_nalus); return err; } @@ -1013,12 +1015,6 @@ srs_error_t SrsRtcRtpBuilder::on_video(SrsSharedPtrMessage* msg) for (int i = 0; i < nn_samples; i++) { SrsSample* sample = samples[i]; - // We always ignore bframe here, if config to discard bframe, - // the bframe flag will not be set. - if (sample->bframe) { - continue; - } - if (sample->size <= kRtpMaxPayloadSize) { if ((err = package_single_nalu(msg, sample, pkts)) != srs_success) { return srs_error_wrap(err, "package single nalu"); @@ -1050,14 +1046,27 @@ srs_error_t SrsRtcRtpBuilder::filter(SrsSharedPtrMessage* msg, SrsFormat* format // Update samples to shared frame. for (int i = 0; i < format->video->nb_samples; ++i) { SrsSample* sample = &format->video->samples[i]; + + if (!keep_avc_nalu_sei && format->vcodec->id == SrsVideoCodecIdAVC) { + SrsAvcNaluType avc_nalu_type; + // TODO: FIXME use static method to parse avc nalu type. + if ((err = SrsVideoFrame::parse_avc_nalu_type(sample, avc_nalu_type)) != srs_success) { + return srs_error_wrap(err, "parse avc nalu_type"); + } + if (avc_nalu_type == SrsAvcNaluTypeSEI) { + // srs_warn("skip avc nalu type SEI, size=%d", sample->size); + continue; + } + } // Because RTC does not support B-frame, so we will drop them. // TODO: Drop B-frame in better way, which not cause picture corruption. - if (!keep_bframe) { - if ((err = sample->parse_bframe()) != srs_success) { + if (!keep_bframe && format->vcodec->id == SrsVideoCodecIdAVC) { + bool is_b_frame; + if ((err = SrsVideoFrame::parse_avc_b_frame(sample, is_b_frame)) != srs_success) { return srs_error_wrap(err, "parse bframe"); } - if (sample->bframe) { + if (is_b_frame) { continue; } } @@ -1137,12 +1146,6 @@ srs_error_t SrsRtcRtpBuilder::package_nalus(SrsSharedPtrMessage* msg, const vect for (int i = 0; i < (int)samples.size(); i++) { SrsSample* sample = samples[i]; - // We always ignore bframe here, if config to discard bframe, - // the bframe flag will not be set. - if (sample->bframe) { - continue; - } - if (!sample->size) { continue; } diff --git a/trunk/src/app/srs_app_rtc_source.hpp b/trunk/src/app/srs_app_rtc_source.hpp index 02e786cf28..75777a38f3 100644 --- a/trunk/src/app/srs_app_rtc_source.hpp +++ b/trunk/src/app/srs_app_rtc_source.hpp @@ -262,6 +262,7 @@ class SrsRtcRtpBuilder SrsAudioCodecId latest_codec_; SrsAudioTranscoder* codec_; bool keep_bframe; + bool keep_avc_nalu_sei; bool merge_nalus; uint16_t audio_sequence; uint16_t video_sequence; diff --git a/trunk/src/kernel/srs_kernel_codec.cpp b/trunk/src/kernel/srs_kernel_codec.cpp index 9afe07867e..752083e81e 100644 --- a/trunk/src/kernel/srs_kernel_codec.cpp +++ b/trunk/src/kernel/srs_kernel_codec.cpp @@ -504,63 +504,23 @@ SrsSample::SrsSample() { size = 0; bytes = NULL; - bframe = false; } SrsSample::SrsSample(char* b, int s) { size = s; bytes = b; - bframe = false; } SrsSample::~SrsSample() { } -srs_error_t SrsSample::parse_bframe() -{ - srs_error_t err = srs_success; - - uint8_t header = bytes[0]; - SrsAvcNaluType nal_type = (SrsAvcNaluType)(header & kNalTypeMask); - - if (nal_type != SrsAvcNaluTypeNonIDR && nal_type != SrsAvcNaluTypeDataPartitionA && nal_type != SrsAvcNaluTypeIDR) { - return err; - } - - SrsBuffer* stream = new SrsBuffer(bytes, size); - SrsAutoFree(SrsBuffer, stream); - - // Skip nalu header. - stream->skip(1); - - SrsBitBuffer bitstream(stream); - int32_t first_mb_in_slice = 0; - if ((err = srs_avc_nalu_read_uev(&bitstream, first_mb_in_slice)) != srs_success) { - return srs_error_wrap(err, "nalu read uev"); - } - - int32_t slice_type_v = 0; - if ((err = srs_avc_nalu_read_uev(&bitstream, slice_type_v)) != srs_success) { - return srs_error_wrap(err, "nalu read uev"); - } - SrsAvcSliceType slice_type = (SrsAvcSliceType)slice_type_v; - - if (slice_type == SrsAvcSliceTypeB || slice_type == SrsAvcSliceTypeB1) { - bframe = true; - srs_verbose("nal_type=%d, slice type=%d", nal_type, slice_type); - } - - return err; -} - SrsSample* SrsSample::copy() { SrsSample* p = new SrsSample(); p->bytes = bytes; p->size = size; - p->bframe = bframe; return p; } @@ -655,7 +615,6 @@ srs_error_t SrsFrame::add_sample(char* bytes, int size) SrsSample* sample = &samples[nb_samples++]; sample->bytes = bytes; sample->size = size; - sample->bframe = false; return err; } @@ -739,6 +698,62 @@ SrsVideoCodecConfig* SrsVideoFrame::vcodec() return (SrsVideoCodecConfig*)codec; } +srs_error_t SrsVideoFrame::parse_avc_nalu_type(const SrsSample* sample, SrsAvcNaluType& avc_nalu_type) +{ + if (sample == NULL || sample->size < 1) { + return srs_error_new(ERROR_AVC_NALU_EMPTY, "empty nalu"); + } + srs_error_t err = srs_success; + + uint8_t header = sample->bytes[0]; + avc_nalu_type = (SrsAvcNaluType)(header & kNalTypeMask); + + return err; +} + +srs_error_t SrsVideoFrame::parse_avc_b_frame(const SrsSample* sample, bool& is_b_frame) +{ + if (sample == NULL || sample->size < 1) { + return srs_error_new(ERROR_AVC_NALU_EMPTY, "empty nalu"); + } + + srs_error_t err = srs_success; + SrsAvcNaluType nalu_type; + if ((err = parse_avc_nalu_type(sample, nalu_type)) != srs_success) { + return srs_error_wrap(err, "parse avc nalu type error"); + } + + if (nalu_type != SrsAvcNaluTypeNonIDR && nalu_type != SrsAvcNaluTypeDataPartitionA && nalu_type != SrsAvcNaluTypeIDR) { + is_b_frame = false; + return err; + } + + SrsBuffer* stream = new SrsBuffer(sample->bytes, sample->size); + SrsAutoFree(SrsBuffer, stream); + + // Skip nalu header. + stream->skip(1); + + SrsBitBuffer bitstream(stream); + int32_t first_mb_in_slice = 0; + if ((err = srs_avc_nalu_read_uev(&bitstream, first_mb_in_slice)) != srs_success) { + return srs_error_wrap(err, "nalu read uev"); + } + + int32_t slice_type_v = 0; + if ((err = srs_avc_nalu_read_uev(&bitstream, slice_type_v)) != srs_success) { + return srs_error_wrap(err, "nalu read uev"); + } + SrsAvcSliceType slice_type = (SrsAvcSliceType)slice_type_v; + + is_b_frame = slice_type == SrsAvcSliceTypeB || slice_type == SrsAvcSliceTypeB1; + if (is_b_frame) { + srs_verbose("nalu_type=%d, slice type=%d", nalu_type, slice_type); + } + + return err; +} + SrsFormat::SrsFormat() { acodec = NULL; diff --git a/trunk/src/kernel/srs_kernel_codec.hpp b/trunk/src/kernel/srs_kernel_codec.hpp index 5f8fc934af..a1462d3328 100644 --- a/trunk/src/kernel/srs_kernel_codec.hpp +++ b/trunk/src/kernel/srs_kernel_codec.hpp @@ -1120,7 +1120,7 @@ std::string srs_hevc_level2str(SrsHevcLevel level); /** * A sample is the unit of frame. - * It's a NALU for H.264. + * It's a NALU for H.264, H.265. * It's the whole AAC raw data for AAC. * @remark Neither SPS/PPS or ASC is sample unit, it's codec sequence header. */ @@ -1131,15 +1131,11 @@ class SrsSample int size; // The ptr of unit, user must free it. char* bytes; - // Whether is B frame. - bool bframe; public: SrsSample(); SrsSample(char* b, int s); ~SrsSample(); public: - // If we need to know whether sample is bframe, we have to parse the NALU payload. - srs_error_t parse_bframe(); // Copy sample, share the bytes pointer. SrsSample* copy(); }; @@ -1322,6 +1318,9 @@ class SrsVideoFrame : public SrsFrame virtual srs_error_t add_sample(char* bytes, int size); public: virtual SrsVideoCodecConfig* vcodec(); +public: + static srs_error_t parse_avc_nalu_type(const SrsSample* sample, SrsAvcNaluType& avc_nalu_type); + static srs_error_t parse_avc_b_frame(const SrsSample* sample, bool& is_b_frame); }; /** diff --git a/trunk/src/kernel/srs_kernel_error.hpp b/trunk/src/kernel/srs_kernel_error.hpp index 7c2a766f27..71477b0c2b 100644 --- a/trunk/src/kernel/srs_kernel_error.hpp +++ b/trunk/src/kernel/srs_kernel_error.hpp @@ -276,7 +276,8 @@ XX(ERROR_HEVC_DISABLED , 3098, "HevcDisabled", "HEVC is disabled") \ XX(ERROR_HEVC_DECODE_ERROR , 3099, "HevcDecode", "HEVC decode av stream failed") \ XX(ERROR_MP4_HVCC_CHANGE , 3100, "Mp4HvcCChange", "MP4 does not support video HvcC change") \ - XX(ERROR_HEVC_API_NO_PREFIXED , 3101, "HevcAnnexbPrefix", "No annexb prefix for HEVC decoder") + XX(ERROR_HEVC_API_NO_PREFIXED , 3101, "HevcAnnexbPrefix", "No annexb prefix for HEVC decoder") \ + XX(ERROR_AVC_NALU_EMPTY , 3102, "AvcNalu", "AVC NALU is empty") /**************************************************/ /* HTTP/StreamConverter protocol error. */ diff --git a/trunk/src/utest/srs_utest_config.cpp b/trunk/src/utest/srs_utest_config.cpp index 640364ce37..07a0fd04b7 100644 --- a/trunk/src/utest/srs_utest_config.cpp +++ b/trunk/src/utest/srs_utest_config.cpp @@ -4405,6 +4405,9 @@ VOID TEST(ConfigEnvTest, CheckEnvValuesVhostRtc) SrsSetEnvConfig(rtc_keep_bframe, "SRS_VHOST_RTC_KEEP_BFRAME", "on"); EXPECT_TRUE(conf.get_rtc_keep_bframe("__defaultVhost__")); + + SrsSetEnvConfig(rtc_keep_avc_nalu_sei, "SRS_VHOST_RTC_KEEP_AVC_NALU_SEI", "off"); + EXPECT_FALSE(conf.get_rtc_keep_avc_nalu_sei("__defaultVhost__")); } if (true) {