From f0fa5bb02e7af2a61e328ba9ff899158c1af90c5 Mon Sep 17 00:00:00 2001 From: cudawarped <12133430+cudawarped@users.noreply.github.com> Date: Thu, 2 Mar 2023 12:51:18 +0200 Subject: [PATCH] cudacodec: add capacity to reconfigure decoder on resolution change --- .../cudacodec/include/opencv2/cudacodec.hpp | 18 ++- modules/cudacodec/src/frame_queue.cpp | 28 +++- modules/cudacodec/src/frame_queue.hpp | 9 ++ modules/cudacodec/src/video_decoder.cpp | 57 +++++++- modules/cudacodec/src/video_decoder.hpp | 10 +- modules/cudacodec/src/video_parser.cpp | 124 +++++++++-------- modules/cudacodec/src/video_parser.hpp | 4 +- modules/cudacodec/src/video_reader.cpp | 10 +- modules/cudacodec/test/test_video.cpp | 131 +++++++++++++++++- 9 files changed, 315 insertions(+), 76 deletions(-) diff --git a/modules/cudacodec/include/opencv2/cudacodec.hpp b/modules/cudacodec/include/opencv2/cudacodec.hpp index d61673cd71f..c1c0b6f2e13 100644 --- a/modules/cudacodec/include/opencv2/cudacodec.hpp +++ b/modules/cudacodec/include/opencv2/cudacodec.hpp @@ -305,16 +305,27 @@ enum DeinterlaceMode Adaptive = 2 }; +/** @brief Output format for a decoded frame when the resolution of the source is reduced by the encoder. In all cases the size of the output frame remains the same. +* * @param Default Use the approach adopted by cv::VideoCapture, i.e. maintain the same frame size by placing the smaller output in the top left corner. +* @param Qos Maintain the same frame resolution by upscaling to the original resolution to seamlessly process streams produced by servers that are adhering to Quality of Service constraints. +* */ +enum ResolutionChangeMode +{ + Default = 0, + Qos = 1 +}; + /** @brief Struct providing information about video file format. : */ struct CV_EXPORTS_W_SIMPLE FormatInfo { - CV_WRAP FormatInfo() : nBitDepthMinus8(-1), ulWidth(0), ulHeight(0), width(0), height(0), ulMaxWidth(0), ulMaxHeight(0), valid(false), + CV_WRAP FormatInfo() : nBitDepthMinus8(-1), nBitDepthChromaMinus8(-1), ulWidth(0), ulHeight(0), width(0), height(0), ulMaxWidth(0), ulMaxHeight(0), valid(false), fps(0), ulNumDecodeSurfaces(0) {}; CV_PROP_RW Codec codec; CV_PROP_RW ChromaFormat chromaFormat; CV_PROP_RW int nBitDepthMinus8; + CV_PROP_RW int nBitDepthChromaMinus8; CV_PROP_RW int ulWidth;//!< Coded sequence width in pixels. CV_PROP_RW int ulHeight;//!< Coded sequence height in pixels. CV_PROP_RW int width;//!< Width of the decoded frame returned by nextFrame(frame). @@ -329,6 +340,7 @@ struct CV_EXPORTS_W_SIMPLE FormatInfo CV_PROP_RW cv::Size targetSz;//!< Post-processed size of the output frame. CV_PROP_RW cv::Rect srcRoi;//!< Region of interest decoded from video source. CV_PROP_RW cv::Rect targetRoi;//!< Region of interest in the output frame containing the decoded frame. + CV_PROP_RW ResolutionChangeMode resChangeMode;//!< Output format for a decoded frame when the resolution of the source is reduced by the encoder. }; /** @brief cv::cudacodec::VideoReader generic properties identifier. @@ -533,9 +545,10 @@ but it cannot go below the number determined by NVDEC. @param srcRoi Region of interest (x/width should be multiples of 4 and y/height multiples of 2) decoded from video source, defaults to the full frame. @param targetRoi Region of interest (x/width should be multiples of 4 and y/height multiples of 2) within the output frame to copy and resize the decoded frame to, defaults to the full frame. +@param resChangeMode Output mode to use when the resolution of the source is changed by the encoder, ignored when targetRoi is specified. */ struct CV_EXPORTS_W_SIMPLE VideoReaderInitParams { - CV_WRAP VideoReaderInitParams() : udpSource(false), allowFrameDrop(false), minNumDecodeSurfaces(0), rawMode(0) {}; + CV_WRAP VideoReaderInitParams() : udpSource(false), allowFrameDrop(false), minNumDecodeSurfaces(0), rawMode(0), resChangeMode(ResolutionChangeMode::Default) {}; CV_PROP_RW bool udpSource; CV_PROP_RW bool allowFrameDrop; CV_PROP_RW int minNumDecodeSurfaces; @@ -543,6 +556,7 @@ struct CV_EXPORTS_W_SIMPLE VideoReaderInitParams { CV_PROP_RW cv::Size targetSz; CV_PROP_RW cv::Rect srcRoi; CV_PROP_RW cv::Rect targetRoi; + CV_PROP_RW ResolutionChangeMode resChangeMode; }; /** @brief Creates video reader. diff --git a/modules/cudacodec/src/frame_queue.cpp b/modules/cudacodec/src/frame_queue.cpp index d73e04eb180..dafe5bb295e 100644 --- a/modules/cudacodec/src/frame_queue.cpp +++ b/modules/cudacodec/src/frame_queue.cpp @@ -55,14 +55,29 @@ cv::cudacodec::detail::FrameQueue::~FrameQueue() { void cv::cudacodec::detail::FrameQueue::init(const int _maxSz) { AutoLock autoLock(mtx_); - if (isFrameInUse_) - return; + if (isFrameInUse_) return; maxSz = _maxSz; displayQueue_ = std::vector(maxSz, CUVIDPARSERDISPINFO()); isFrameInUse_ = new volatile int[maxSz]; std::memset((void*)isFrameInUse_, 0, sizeof(*isFrameInUse_) * maxSz); } +void cv::cudacodec::detail::FrameQueue::resize(const int newSz) { + if (newSz == maxSz) return; + if (!isFrameInUse_) return init(newSz); + AutoLock autoLock(mtx_); + const int maxSzOld = maxSz; maxSz = newSz; + const auto displayQueueOld = displayQueue_; + displayQueue_ = std::vector(maxSz, CUVIDPARSERDISPINFO()); + for (int i = readPosition_; i < readPosition_ + framesInQueue_; i++) + displayQueue_.at(i % displayQueue_.size()) = displayQueueOld.at(i % displayQueueOld.size()); + const volatile int* const isFrameInUseOld = isFrameInUse_; + isFrameInUse_ = new volatile int[maxSz]; + std::memset((void*)isFrameInUse_, 0, sizeof(*isFrameInUse_) * maxSz); + std::memcpy((void*)isFrameInUse_, (void*)isFrameInUseOld, sizeof(*isFrameInUseOld) * min(maxSz,maxSzOld)); + delete[] isFrameInUseOld; +} + bool cv::cudacodec::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex, const bool allowFrameDrop) { while (isInUse(pictureIndex)) @@ -79,6 +94,15 @@ bool cv::cudacodec::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex return true; } +bool cv::cudacodec::detail::FrameQueue::waitUntilEmpty() { + while (framesInQueue_) { + Thread::sleep(1); + if (isEndOfDecode()) + return false; + } + return true; +} + void cv::cudacodec::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams, const std::vector rawPackets) { // Mark the frame as 'in-use' so we don't re-use it for decoding until it is no longer needed diff --git a/modules/cudacodec/src/frame_queue.hpp b/modules/cudacodec/src/frame_queue.hpp index 840b23c5dde..4eb17809e0d 100644 --- a/modules/cudacodec/src/frame_queue.hpp +++ b/modules/cudacodec/src/frame_queue.hpp @@ -66,6 +66,12 @@ class FrameQueue ~FrameQueue(); void init(const int _maxSz); + // Resize the current frame queue keeping any existing queued values - must only + // be called in the same thread as enqueue. + // Parameters: + // newSz - new size of the frame queue. + void resize(const int newSz); + void endDecode() { endOfDecode_ = true; } bool isEndOfDecode() const { return endOfDecode_ != 0;} @@ -77,6 +83,8 @@ class FrameQueue // to ensure a frame is available. bool waitUntilFrameAvailable(int pictureIndex, const bool allowFrameDrop = false); + bool waitUntilEmpty(); + void enqueue(const CUVIDPARSERDISPINFO* picParams, const std::vector rawPackets); // Deque the next frame. @@ -97,6 +105,7 @@ class FrameQueue bool dequeueUntil(const int pictureIndex); void releaseFrame(const CUVIDPARSERDISPINFO& picParams) { isFrameInUse_[picParams.picture_index] = 0; } + int getMaxSz() { return maxSz; } private: bool isInUse(int pictureIndex) const { return isFrameInUse_[pictureIndex] != 0; } diff --git a/modules/cudacodec/src/video_decoder.cpp b/modules/cudacodec/src/video_decoder.cpp index f828b08c158..2558b6d2fb3 100644 --- a/modules/cudacodec/src/video_decoder.cpp +++ b/modules/cudacodec/src/video_decoder.cpp @@ -124,8 +124,8 @@ void cv::cudacodec::detail::VideoDecoder::create(const FormatInfo& videoFormat) cuSafeCall(cuvidGetDecoderCaps(&decodeCaps)); cuSafeCall(cuCtxPopCurrent(NULL)); if (!(decodeCaps.bIsSupported && (decodeCaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_NV12)))){ - CV_Error(Error::StsUnsupportedFormat, "Video source is not supported by hardware video decoder"); CV_LOG_ERROR(NULL, "Video source is not supported by hardware video decoder."); + CV_Error(Error::StsUnsupportedFormat, "Video source is not supported by hardware video decoder"); } CV_Assert(videoFormat.ulWidth >= decodeCaps.nMinWidth && videoFormat.ulHeight >= decodeCaps.nMinHeight && @@ -162,6 +162,61 @@ void cv::cudacodec::detail::VideoDecoder::create(const FormatInfo& videoFormat) cuSafeCall(cuCtxPushCurrent(ctx_)); cuSafeCall(cuvidCreateDecoder(&decoder_, &createInfo_)); cuSafeCall(cuCtxPopCurrent(NULL)); + inited_ = true; +} + +int cv::cudacodec::detail::VideoDecoder::reconfigure(const FormatInfo& videoFormat) { + if (videoFormat.nBitDepthMinus8 != videoFormat_.nBitDepthMinus8 || videoFormat.nBitDepthChromaMinus8 != videoFormat_.nBitDepthChromaMinus8) { + CV_LOG_ERROR(NULL, "Reconfigure Not supported for bit depth change"); + CV_Error(Error::StsUnsupportedFormat, "Reconfigure Not supported for bit depth change"); + } + + if (videoFormat.chromaFormat != videoFormat_.chromaFormat) { + CV_LOG_ERROR(NULL, "Reconfigure Not supported for chroma format change"); + CV_Error(Error::StsUnsupportedFormat, "Reconfigure Not supported for chroma format change"); + } + + const bool decodeResChange = !(videoFormat.ulWidth == videoFormat_.ulWidth && videoFormat.ulHeight == videoFormat_.ulHeight); + + if ((videoFormat.ulWidth > videoFormat_.ulMaxWidth) || (videoFormat.ulHeight > videoFormat_.ulMaxHeight)) { + // For VP9, let driver handle the change if new width/height > maxwidth/maxheight + if (videoFormat.codec != Codec::VP9) { + CV_LOG_ERROR(NULL, "Reconfigure Not supported when width/height > maxwidth/maxheight"); + CV_Error(Error::StsUnsupportedFormat, "Reconfigure Not supported when width/height > maxwidth/maxheight"); + } + } + + { + AutoLock autoLock(mtx_); + videoFormat_.ulNumDecodeSurfaces = videoFormat.ulNumDecodeSurfaces; + videoFormat_.ulWidth = videoFormat.ulWidth; + videoFormat_.ulHeight = videoFormat.ulHeight; + videoFormat_.targetRoi = videoFormat.targetRoi; + } + + if (!decodeResChange) + return 1; + + CUVIDRECONFIGUREDECODERINFO reconfigParams = { 0 }; + reconfigParams.ulWidth = videoFormat_.ulWidth; + reconfigParams.ulHeight = videoFormat_.ulHeight; + reconfigParams.display_area.left = videoFormat_.displayArea.x; + reconfigParams.display_area.right = videoFormat_.displayArea.x + videoFormat_.displayArea.width; + reconfigParams.display_area.top = videoFormat_.displayArea.y; + reconfigParams.display_area.bottom = videoFormat_.displayArea.y + videoFormat_.displayArea.height; + reconfigParams.ulTargetWidth = videoFormat_.width; + reconfigParams.ulTargetHeight = videoFormat_.height; + reconfigParams.target_rect.left = videoFormat_.targetRoi.x; + reconfigParams.target_rect.right = videoFormat_.targetRoi.x + videoFormat_.targetRoi.width; + reconfigParams.target_rect.top = videoFormat_.targetRoi.y; + reconfigParams.target_rect.bottom = videoFormat_.targetRoi.y + videoFormat_.targetRoi.height; + reconfigParams.ulNumDecodeSurfaces = videoFormat_.ulNumDecodeSurfaces; + + cuSafeCall(cuCtxPushCurrent(ctx_)); + cuSafeCall(cuvidReconfigureDecoder(decoder_, &reconfigParams)); + cuSafeCall(cuCtxPopCurrent(NULL)); + CV_LOG_INFO(NULL, "Reconfiguring Decoder"); + return videoFormat_.ulNumDecodeSurfaces; } void cv::cudacodec::detail::VideoDecoder::release() diff --git a/modules/cudacodec/src/video_decoder.hpp b/modules/cudacodec/src/video_decoder.hpp index 76d731f2078..96ec853c756 100644 --- a/modules/cudacodec/src/video_decoder.hpp +++ b/modules/cudacodec/src/video_decoder.hpp @@ -49,11 +49,12 @@ namespace cv { namespace cudacodec { namespace detail { class VideoDecoder { public: - VideoDecoder(const Codec& codec, const int minNumDecodeSurfaces, cv::Size targetSz, cv::Rect srcRoi, cv::Rect targetRoi, CUcontext ctx, CUvideoctxlock lock) : + VideoDecoder(const Codec& codec, const int minNumDecodeSurfaces, cv::Size targetSz, cv::Rect srcRoi, cv::Rect targetRoi, const ResolutionChangeMode resChangeMode, CUcontext ctx, CUvideoctxlock lock) : ctx_(ctx), lock_(lock), decoder_(0) { videoFormat_.codec = codec; videoFormat_.ulNumDecodeSurfaces = minNumDecodeSurfaces; + videoFormat_.resChangeMode = resChangeMode; // alignment enforced by nvcuvid, likely due to chroma subsampling videoFormat_.targetSz.width = targetSz.width - targetSz.width % 2; videoFormat_.targetSz.height = targetSz.height - targetSz.height % 2; videoFormat_.srcRoi.x = srcRoi.x - srcRoi.x % 4; videoFormat_.srcRoi.width = srcRoi.width - srcRoi.width % 4; @@ -68,14 +69,16 @@ class VideoDecoder } void create(const FormatInfo& videoFormat); + int reconfigure(const FormatInfo& videoFormat); void release(); - // Get the code-type currently used. + // Get the codec-type currently used. cudaVideoCodec codec() const { return static_cast(videoFormat_.codec); } int nDecodeSurfaces() const { return videoFormat_.ulNumDecodeSurfaces; } cv::Size getTargetSz() const { return videoFormat_.targetSz; } cv::Rect getSrcRoi() const { return videoFormat_.srcRoi; } cv::Rect getTargetRoi() const { return videoFormat_.targetRoi; } + ResolutionChangeMode getResChangeMode() const { return videoFormat_.resChangeMode; } unsigned long frameWidth() const { return videoFormat_.ulWidth; } unsigned long frameHeight() const { return videoFormat_.ulHeight; } @@ -84,6 +87,8 @@ class VideoDecoder unsigned long targetWidth() { return videoFormat_.width; } unsigned long targetHeight() { return videoFormat_.height; } + bool inited() { return inited_; } + cudaVideoChromaFormat chromaFormat() const { return static_cast(videoFormat_.chromaFormat); } int nBitDepthMinus8() const { return videoFormat_.nBitDepthMinus8; } @@ -114,6 +119,7 @@ class VideoDecoder CUvideodecoder decoder_ = 0; FormatInfo videoFormat_ = {}; Mutex mtx_; + bool inited_ = false; }; }}} diff --git a/modules/cudacodec/src/video_parser.cpp b/modules/cudacodec/src/video_parser.cpp index 8bccd065a8d..09abc3135c6 100644 --- a/modules/cudacodec/src/video_parser.cpp +++ b/modules/cudacodec/src/video_parser.cpp @@ -68,6 +68,7 @@ bool cv::cudacodec::detail::VideoParser::parseVideoData(const unsigned char* dat CUVIDSOURCEDATAPACKET packet; std::memset(&packet, 0, sizeof(CUVIDSOURCEDATAPACKET)); + packet.flags = CUVID_PKT_TIMESTAMP; if (endOfStream) packet.flags |= CUVID_PKT_ENDOFSTREAM; @@ -107,73 +108,86 @@ int CUDAAPI cv::cudacodec::detail::VideoParser::HandleVideoSequence(void* userDa thiz->unparsedPackets_ = 0; - if (format->codec != thiz->videoDecoder_->codec() || - format->coded_width != thiz->videoDecoder_->frameWidth() || - format->coded_height != thiz->videoDecoder_->frameHeight() || - format->chroma_format != thiz->videoDecoder_->chromaFormat()|| - format->bit_depth_luma_minus8 != thiz->videoDecoder_->nBitDepthMinus8() || - format->min_num_decode_surfaces != thiz->videoDecoder_->nDecodeSurfaces()) + FormatInfo newFormat; + newFormat.codec = static_cast(format->codec); + newFormat.chromaFormat = static_cast(format->chroma_format); + newFormat.nBitDepthMinus8 = format->bit_depth_luma_minus8; + newFormat.nBitDepthChromaMinus8 = format->bit_depth_chroma_minus8; + newFormat.ulWidth = format->coded_width; + newFormat.ulHeight = format->coded_height; + newFormat.fps = format->frame_rate.numerator / static_cast(format->frame_rate.denominator); + newFormat.targetSz = thiz->videoDecoder_->getTargetSz(); + newFormat.width = newFormat.targetSz.width ? newFormat.targetSz.width : format->coded_width; + newFormat.height = newFormat.targetSz.height ? newFormat.targetSz.height : format->coded_height; + newFormat.srcRoi = thiz->videoDecoder_->getSrcRoi(); + if (newFormat.srcRoi.empty()) { + format->display_area.right = format->coded_width; + format->display_area.bottom = format->coded_height; + newFormat.displayArea = Rect(Point(format->display_area.left, format->display_area.top), Point(format->display_area.right, format->display_area.bottom)); + } + else + newFormat.displayArea = newFormat.srcRoi; + newFormat.targetRoi = thiz->videoDecoder_->getTargetRoi(); + newFormat.resChangeMode = thiz->videoDecoder_->getResChangeMode(); + newFormat.ulNumDecodeSurfaces = min(!thiz->allowFrameDrop_ ? max(thiz->videoDecoder_->nDecodeSurfaces(), static_cast(format->min_num_decode_surfaces)) : + format->min_num_decode_surfaces * 2, 32); + if (format->progressive_sequence) + newFormat.deinterlaceMode = Weave; + else + newFormat.deinterlaceMode = Adaptive; + int maxW = 0, maxH = 0; + // AV1 has max width/height of sequence in sequence header + if (format->codec == cudaVideoCodec_AV1 && format->seqhdr_data_length > 0) { - FormatInfo newFormat; - newFormat.codec = static_cast(format->codec); - newFormat.chromaFormat = static_cast(format->chroma_format); - newFormat.nBitDepthMinus8 = format->bit_depth_luma_minus8; - newFormat.ulWidth = format->coded_width; - newFormat.ulHeight = format->coded_height; - newFormat.fps = format->frame_rate.numerator / static_cast(format->frame_rate.denominator); - newFormat.targetSz = thiz->videoDecoder_->getTargetSz(); - newFormat.width = newFormat.targetSz.width ? newFormat.targetSz.width : format->coded_width; - newFormat.height = newFormat.targetSz.height ? newFormat.targetSz.height : format->coded_height; - newFormat.srcRoi = thiz->videoDecoder_->getSrcRoi(); - if (newFormat.srcRoi.empty()) { - format->display_area.right = format->coded_width; - format->display_area.bottom = format->coded_height; - newFormat.displayArea = Rect(Point(format->display_area.left, format->display_area.top), Point(format->display_area.right, format->display_area.bottom)); - } - else - newFormat.displayArea = newFormat.srcRoi; - newFormat.targetRoi = thiz->videoDecoder_->getTargetRoi(); - newFormat.ulNumDecodeSurfaces = min(!thiz->allowFrameDrop_ ? max(thiz->videoDecoder_->nDecodeSurfaces(), static_cast(format->min_num_decode_surfaces)) : - format->min_num_decode_surfaces * 2, 32); - if (format->progressive_sequence) - newFormat.deinterlaceMode = Weave; - else - newFormat.deinterlaceMode = Adaptive; - int maxW = 0, maxH = 0; - // AV1 has max width/height of sequence in sequence header - if (format->codec == cudaVideoCodec_AV1 && format->seqhdr_data_length > 0) - { - CUVIDEOFORMATEX* vidFormatEx = (CUVIDEOFORMATEX*)format; - maxW = vidFormatEx->av1.max_width; - maxH = vidFormatEx->av1.max_height; + CUVIDEOFORMATEX* vidFormatEx = (CUVIDEOFORMATEX*)format; + maxW = vidFormatEx->av1.max_width; + maxH = vidFormatEx->av1.max_height; + } + if (maxW < (int)format->coded_width) + maxW = format->coded_width; + if (maxH < (int)format->coded_height) + maxH = format->coded_height; + newFormat.ulMaxWidth = maxW; + newFormat.ulMaxHeight = maxH; + if(newFormat.ulNumDecodeSurfaces != thiz->frameQueue_->getMaxSz()) + thiz->frameQueue_->resize(newFormat.ulNumDecodeSurfaces); + try + { + if (thiz->videoDecoder_->inited()) { + if (newFormat.targetRoi.empty() && newFormat.resChangeMode == ResolutionChangeMode::Default) { + newFormat.targetRoi.x = 0; + newFormat.targetRoi.width = newFormat.ulWidth; + newFormat.targetRoi.y = 0; + newFormat.targetRoi.height = newFormat.ulHeight; + } + thiz->reconfigInfo = newFormat; + thiz->reconfigureDecoder = true; + return newFormat.ulNumDecodeSurfaces; } - if (maxW < (int)format->coded_width) - maxW = format->coded_width; - if (maxH < (int)format->coded_height) - maxH = format->coded_height; - newFormat.ulMaxWidth = maxW; - newFormat.ulMaxHeight = maxH; - thiz->frameQueue_->init(newFormat.ulNumDecodeSurfaces); - try - { + else { thiz->videoDecoder_->release(); thiz->videoDecoder_->create(newFormat); - } - catch (const cv::Exception&) - { - CV_LOG_ERROR(NULL, "Attempt to reconfigure Nvidia decoder failed!"); - thiz->hasError_ = true; - return false; + return thiz->videoDecoder_->nDecodeSurfaces(); } } - - return thiz->videoDecoder_->nDecodeSurfaces(); + catch (const cv::Exception&) + { + CV_LOG_ERROR(NULL, "Attempt to configure Nvidia decoder failed!"); + thiz->hasError_ = true; + return 0; + } } int CUDAAPI cv::cudacodec::detail::VideoParser::HandlePictureDecode(void* userData, CUVIDPICPARAMS* picParams) { VideoParser* thiz = static_cast(userData); + if (thiz->reconfigureDecoder) { + thiz->reconfigureDecoder = false; + thiz->frameQueue_->waitUntilEmpty(); + thiz->videoDecoder_->reconfigure(thiz->reconfigInfo); + } + thiz->unparsedPackets_ = 0; bool isFrameAvailable = thiz->frameQueue_->waitUntilFrameAvailable(picParams->CurrPicIdx, thiz->allowFrameDrop_); diff --git a/modules/cudacodec/src/video_parser.hpp b/modules/cudacodec/src/video_parser.hpp index 28159a9b98a..9853b4aa8b0 100644 --- a/modules/cudacodec/src/video_parser.hpp +++ b/modules/cudacodec/src/video_parser.hpp @@ -76,10 +76,10 @@ class VideoParser std::vector currentFramePackets; volatile bool hasError_ = false; bool allowFrameDrop_ = false; + bool reconfigureDecoder = false; + FormatInfo reconfigInfo; // Called when the decoder encounters a video format change (or initial sequence header) - // This particular implementation of the callback returns 0 in case the video format changes - // to something different than the original format. Returning 0 causes a stop of the app. static int CUDAAPI HandleVideoSequence(void* pUserData, CUVIDEOFORMAT* pFormat); // Called by the video parser to decode a single picture diff --git a/modules/cudacodec/src/video_reader.cpp b/modules/cudacodec/src/video_reader.cpp index a566bd4de71..677519f961b 100644 --- a/modules/cudacodec/src/video_reader.cpp +++ b/modules/cudacodec/src/video_reader.cpp @@ -88,7 +88,7 @@ namespace { public: explicit VideoReaderImpl(const Ptr& source, const int minNumDecodeSurfaces, const bool allowFrameDrop = false , const bool udpSource = false, - const Size targetSz = Size(), const Rect srcRoi = Rect(), const Rect targetRoi = Rect()); + const Size targetSz = Size(), const Rect srcRoi = Rect(), const Rect targetRoi = Rect(), const ResolutionChangeMode resChangeMode = ResolutionChangeMode::Default); ~VideoReaderImpl(); bool nextFrame(GpuMat& frame, Stream& stream) CV_OVERRIDE; @@ -134,7 +134,7 @@ namespace } VideoReaderImpl::VideoReaderImpl(const Ptr& source, const int minNumDecodeSurfaces, const bool allowFrameDrop, const bool udpSource, - const Size targetSz, const Rect srcRoi, const Rect targetRoi) : + const Size targetSz, const Rect srcRoi, const Rect targetRoi, const ResolutionChangeMode resChangeMode) : videoSource_(source), lock_(0) { @@ -146,7 +146,7 @@ namespace cuSafeCall( cuCtxGetCurrent(&ctx) ); cuSafeCall( cuvidCtxLockCreate(&lock_, ctx) ); frameQueue_.reset(new FrameQueue()); - videoDecoder_.reset(new VideoDecoder(videoSource_->format().codec, minNumDecodeSurfaces, targetSz, srcRoi, targetRoi, ctx, lock_)); + videoDecoder_.reset(new VideoDecoder(videoSource_->format().codec, minNumDecodeSurfaces, targetSz, srcRoi, targetRoi, resChangeMode, ctx, lock_)); videoParser_.reset(new VideoParser(videoDecoder_, frameQueue_, allowFrameDrop, udpSource)); videoSource_->setVideoParser(videoParser_); videoSource_->start(); @@ -370,14 +370,14 @@ Ptr cv::cudacodec::createVideoReader(const String& filename, const } return makePtr(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource, params.targetSz, - params.srcRoi, params.targetRoi); + params.srcRoi, params.targetRoi, params.resChangeMode); } Ptr cv::cudacodec::createVideoReader(const Ptr& source, const VideoReaderInitParams params) { Ptr videoSource(new RawVideoSourceWrapper(source, params.rawMode)); return makePtr(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource, params.targetSz, - params.srcRoi, params.targetRoi); + params.srcRoi, params.targetRoi, params.resChangeMode); } #endif // HAVE_NVCUVID diff --git a/modules/cudacodec/test/test_video.cpp b/modules/cudacodec/test/test_video.cpp index b9b4e9f25c6..83825a30aba 100644 --- a/modules/cudacodec/test/test_video.cpp +++ b/modules/cudacodec/test/test_video.cpp @@ -62,6 +62,14 @@ PARAM_TEST_CASE(Video, cv::cuda::DeviceInfo, std::string) { }; +PARAM_TEST_CASE(ReconfigureDecoderWithScaling, cv::cuda::DeviceInfo, cudacodec::ResolutionChangeMode) +{ +}; + +PARAM_TEST_CASE(ReconfigureDecoder, cv::cuda::DeviceInfo, cudacodec::ResolutionChangeMode, int) +{ +}; + PARAM_TEST_CASE(VideoReadRaw, cv::cuda::DeviceInfo, std::string) { }; @@ -181,6 +189,14 @@ CUDA_TEST_P(CheckKeyFrame, Reader) } } +void ForceAlignment(Rect& srcRoi, Rect& targetRoi, Size& targetSz) { + targetSz.width = targetSz.width - targetSz.width % 2; targetSz.height = targetSz.height - targetSz.height % 2; + srcRoi.x = srcRoi.x - srcRoi.x % 4; srcRoi.width = srcRoi.width - srcRoi.width % 4; + srcRoi.y = srcRoi.y - srcRoi.y % 2; srcRoi.height = srcRoi.height - srcRoi.height % 2; + targetRoi.x = targetRoi.x - targetRoi.x % 4; targetRoi.width = targetRoi.width - targetRoi.width % 4; + targetRoi.y = targetRoi.y - targetRoi.y % 2; targetRoi.height = targetRoi.height - targetRoi.height % 2; +} + CUDA_TEST_P(Scaling, Reader) { cv::cuda::setDevice(GET_PARAM(0).deviceID()); @@ -202,18 +218,15 @@ CUDA_TEST_P(Scaling, Reader) static_cast(frameOr.rows * srcRoiIn.height)); params.targetRoi = Rect(static_cast(params.targetSz.width * targetRoiIn.x), static_cast(params.targetSz.height * targetRoiIn.y), static_cast(params.targetSz.width * targetRoiIn.width), static_cast(params.targetSz.height * targetRoiIn.height)); + cv::Ptr reader = cv::cudacodec::createVideoReader(inputFile, {}, params); ASSERT_TRUE(reader->set(cudacodec::ColorFormat::GRAY)); GpuMat frame; ASSERT_TRUE(reader->nextFrame(frame)); const cudacodec::FormatInfo format = reader->format(); - Size targetSzOut; - targetSzOut.width = params.targetSz.width - params.targetSz.width % 2; targetSzOut.height = params.targetSz.height - params.targetSz.height % 2; - Rect srcRoiOut, targetRoiOut; - srcRoiOut.x = params.srcRoi.x - params.srcRoi.x % 4; srcRoiOut.width = params.srcRoi.width - params.srcRoi.width % 4; - srcRoiOut.y = params.srcRoi.y - params.srcRoi.y % 2; srcRoiOut.height = params.srcRoi.height - params.srcRoi.height % 2; - targetRoiOut.x = params.targetRoi.x - params.targetRoi.x % 4; targetRoiOut.width = params.targetRoi.width - params.targetRoi.width % 4; - targetRoiOut.y = params.targetRoi.y - params.targetRoi.y % 2; targetRoiOut.height = params.targetRoi.height - params.targetRoi.height % 2; + Size targetSzOut = params.targetSz; + Rect srcRoiOut = params.srcRoi, targetRoiOut = params.targetRoi; + ForceAlignment(srcRoiOut, targetRoiOut, targetSzOut); ASSERT_TRUE(format.valid && format.targetSz == targetSzOut && format.srcRoi == srcRoiOut && format.targetRoi == targetRoiOut); ASSERT_TRUE(frame.size() == targetSzOut); GpuMat frameGs; @@ -265,6 +278,104 @@ CUDA_TEST_P(Video, Reader) } } +CUDA_TEST_P(ReconfigureDecoderWithScaling, Reader) +{ + const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../highgui/video/big_buck_bunny_multi_res.h264"; + cv::cuda::setDevice(GET_PARAM(0).deviceID()); + const cudacodec::ResolutionChangeMode resChangeMode = GET_PARAM(1); + + GpuMat frameOr; + { + cv::Ptr readerGs = cv::cudacodec::createVideoReader(inputFile); + ASSERT_TRUE(readerGs->nextFrame(frameOr)); + } + + cv::cudacodec::VideoReaderInitParams params; + params.resChangeMode = resChangeMode; + const Size2f targetSzNew(0.8f, 0.9f); + const Rect2f srcRoiNew(0.25f, 0.25f, 0.5f, 0.5f); + const Rect2f targetRoiNew(0.2f, 0.3f, 0.6f, 0.7f); + params.targetSz = Size(static_cast(frameOr.cols * targetSzNew.width), static_cast(frameOr.rows * targetSzNew.height)); + params.srcRoi = Rect(static_cast(frameOr.cols * srcRoiNew.x), static_cast(frameOr.rows * srcRoiNew.y), static_cast(frameOr.cols * srcRoiNew.width), + static_cast(frameOr.rows * srcRoiNew.height)); + params.targetRoi = Rect(static_cast(params.targetSz.width * targetRoiNew.x), static_cast(params.targetSz.height * targetRoiNew.y), + static_cast(params.targetSz.width * targetRoiNew.width), static_cast(params.targetSz.height * targetRoiNew.height)); + + Size targetSzOut = params.targetSz; + Rect srcRoiOut = params.srcRoi, targetRoiOut = params.targetRoi; + ForceAlignment(srcRoiOut, targetRoiOut, targetSzOut); + GpuMat mask(targetSzOut, CV_8U, Scalar(255)); + mask(targetRoiOut).setTo(0); + + cv::Ptr reader = cv::cudacodec::createVideoReader(inputFile, {}, params); + reader->set(cudacodec::ColorFormat::GRAY); + cv::cudacodec::FormatInfo fmt; + cv::cuda::GpuMat frame; + int nFrames = 0; + Size initialSize; + while (reader->nextFrame(frame)) + { + ASSERT_TRUE(!frame.empty()); + if (nFrames++ == 0) + initialSize = frame.size(); + fmt = reader->format(); + ASSERT_TRUE(fmt.valid && (frame.size() == initialSize)); + ASSERT_TRUE(fmt.resChangeMode == resChangeMode); + ASSERT_TRUE((frame.size() == targetSzOut) && (fmt.targetSz == targetSzOut) && (fmt.srcRoi == srcRoiOut) && (fmt.targetRoi == targetRoiOut)); + // currently NV12 conversion is buggy and will be fixed in a future PR, therefore simply check black borders which are 16 as this source is using + // a reduced luma range + cuda::add(frame, -16, frame); + ASSERT_TRUE(!cuda::absSum(frame, mask)[0]); + } + ASSERT_TRUE(nFrames == 40); +} + + +CUDA_TEST_P(ReconfigureDecoder, Reader) +{ + const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../highgui/video/big_buck_bunny_multi_res.h264"; + cv::cuda::setDevice(GET_PARAM(0).deviceID()); + const cudacodec::ResolutionChangeMode resChangeMode = GET_PARAM(1); + const int minNumDecodeSurfaces = GET_PARAM(2); + cv::cudacodec::VideoReaderInitParams params; + params.resChangeMode = resChangeMode; + params.minNumDecodeSurfaces = minNumDecodeSurfaces; + cv::Ptr reader = cv::cudacodec::createVideoReader(inputFile, {}, params); + reader->set(cudacodec::ColorFormat::GRAY); + cv::cudacodec::FormatInfo fmt; + cv::cuda::GpuMat frame, mask; + int nFrames = 0; + Size initialSize; + while(reader->nextFrame(frame)) + { + ASSERT_TRUE(!frame.empty()); + if (nFrames++ == 0) + initialSize = frame.size(); + fmt = reader->format(); + ASSERT_TRUE(fmt.valid && (frame.size() == initialSize)); + ASSERT_TRUE(fmt.resChangeMode == resChangeMode); + ASSERT_TRUE(fmt.srcRoi.empty() && fmt.targetSz.empty()); + const bool resChanged = (initialSize.width != fmt.ulWidth) || (initialSize.height != fmt.ulHeight); + if (resChanged) { + if (resChangeMode == cudacodec::ResolutionChangeMode::Default) { + ASSERT_TRUE(fmt.targetRoi == Rect(0, 0, fmt.ulWidth, fmt.ulHeight)); + if (mask.empty()) { + mask = GpuMat(frame.size(), CV_8U, Scalar(255)); + mask(fmt.targetRoi).setTo(0); + } + // currently NV12 conversion is buggy and will be fixed in a future PR, therefore simply check black borders which are 16 as this source is + // using a reduced luma range + cuda::add(frame, -16, frame); + ASSERT_TRUE(!cuda::absSum(frame, mask)[0]); + } + else if (resChangeMode == cudacodec::ResolutionChangeMode::Qos) { + ASSERT_TRUE(fmt.targetRoi.empty()); + } + } + } + ASSERT_TRUE(nFrames == 40); +} + CUDA_TEST_P(VideoReadRaw, Reader) { cv::cuda::setDevice(GET_PARAM(0).deviceID()); @@ -672,6 +783,12 @@ INSTANTIATE_TEST_CASE_P(CUDA_Codec, Video, testing::Combine( ALL_DEVICES, testing::Values(VIDEO_SRC_R))); +#define RES_CHANGE_MODE cudacodec::ResolutionChangeMode::Default, cudacodec::ResolutionChangeMode::Qos +INSTANTIATE_TEST_CASE_P(CUDA_Codec, ReconfigureDecoderWithScaling, testing::Combine(ALL_DEVICES, testing::Values(RES_CHANGE_MODE))); + +#define N_DECODE_SURFACES 0, 10 +INSTANTIATE_TEST_CASE_P(CUDA_Codec, ReconfigureDecoder, testing::Combine( ALL_DEVICES, testing::Values(RES_CHANGE_MODE), testing::Values(N_DECODE_SURFACES))); + #define VIDEO_SRC_RW "highgui/video/big_buck_bunny.h264", "highgui/video/big_buck_bunny.h265" INSTANTIATE_TEST_CASE_P(CUDA_Codec, VideoReadRaw, testing::Combine( ALL_DEVICES,