cudacodec: add capacity to reconfigure decoder on resolution change

opencv · Mar 21, 2023 · f0fa5bb · f0fa5bb
1 parent ed1873b
commit f0fa5bb
Show file tree

Hide file tree

Showing 9 changed files with 315 additions and 76 deletions.
diff --git a/modules/cudacodec/include/opencv2/cudacodec.hpp b/modules/cudacodec/include/opencv2/cudacodec.hpp
@@ -305,16 +305,27 @@ enum DeinterlaceMode
     Adaptive = 2
 };
 
+/** @brief Output format for a decoded frame when the resolution of the source is reduced by the encoder.  In all cases the size of the output frame remains the same.
+* * @param Default Use the approach adopted by cv::VideoCapture, i.e. maintain the same frame size by placing the smaller output in the top left corner.
+* @param Qos Maintain the same frame resolution by upscaling to the original resolution to seamlessly process streams produced by servers that are adhering to Quality of Service constraints.
+* */
+enum ResolutionChangeMode
+{
+    Default = 0,
+    Qos = 1
+};
+
 /** @brief Struct providing information about video file format. :
  */
 struct CV_EXPORTS_W_SIMPLE FormatInfo
 {
-    CV_WRAP FormatInfo() : nBitDepthMinus8(-1), ulWidth(0), ulHeight(0), width(0), height(0), ulMaxWidth(0), ulMaxHeight(0), valid(false),
+    CV_WRAP FormatInfo() : nBitDepthMinus8(-1), nBitDepthChromaMinus8(-1), ulWidth(0), ulHeight(0), width(0), height(0), ulMaxWidth(0), ulMaxHeight(0), valid(false),
         fps(0), ulNumDecodeSurfaces(0) {};
 
     CV_PROP_RW Codec codec;
     CV_PROP_RW ChromaFormat chromaFormat;
     CV_PROP_RW int nBitDepthMinus8;
+    CV_PROP_RW int nBitDepthChromaMinus8;
     CV_PROP_RW int ulWidth;//!< Coded sequence width in pixels.
     CV_PROP_RW int ulHeight;//!< Coded sequence height in pixels.
     CV_PROP_RW int width;//!< Width of the decoded frame returned by nextFrame(frame).
@@ -329,6 +340,7 @@ struct CV_EXPORTS_W_SIMPLE FormatInfo
     CV_PROP_RW cv::Size targetSz;//!< Post-processed size of the output frame.
     CV_PROP_RW cv::Rect srcRoi;//!< Region of interest decoded from video source.
     CV_PROP_RW cv::Rect targetRoi;//!< Region of interest in the output frame containing the decoded frame.
+    CV_PROP_RW ResolutionChangeMode resChangeMode;//!< Output format for a decoded frame when the resolution of the source is reduced by the encoder.
 };
 
 /** @brief cv::cudacodec::VideoReader generic properties identifier.
@@ -533,16 +545,18 @@ but it cannot go below the number determined by NVDEC.
 @param srcRoi Region of interest (x/width should be multiples of 4 and y/height multiples of 2) decoded from video source, defaults to the full frame.
 @param targetRoi Region of interest (x/width should be multiples of 4 and y/height multiples of 2) within the output frame to copy and resize the decoded frame to,
 defaults to the full frame.
+@param resChangeMode Output mode to use when the resolution of the source is changed by the encoder, ignored when targetRoi is specified.
 */
 struct CV_EXPORTS_W_SIMPLE VideoReaderInitParams {
-    CV_WRAP VideoReaderInitParams() : udpSource(false), allowFrameDrop(false), minNumDecodeSurfaces(0), rawMode(0) {};
+    CV_WRAP VideoReaderInitParams() : udpSource(false), allowFrameDrop(false), minNumDecodeSurfaces(0), rawMode(0), resChangeMode(ResolutionChangeMode::Default) {};
     CV_PROP_RW bool udpSource;
     CV_PROP_RW bool allowFrameDrop;
     CV_PROP_RW int minNumDecodeSurfaces;
     CV_PROP_RW bool rawMode;
     CV_PROP_RW cv::Size targetSz;
     CV_PROP_RW cv::Rect srcRoi;
     CV_PROP_RW cv::Rect targetRoi;
+    CV_PROP_RW ResolutionChangeMode resChangeMode;
 };
 
 /** @brief Creates video reader.

diff --git a/modules/cudacodec/src/frame_queue.cpp b/modules/cudacodec/src/frame_queue.cpp
@@ -55,14 +55,29 @@ cv::cudacodec::detail::FrameQueue::~FrameQueue() {
 
 void cv::cudacodec::detail::FrameQueue::init(const int _maxSz) {
     AutoLock autoLock(mtx_);
-    if (isFrameInUse_)
-        return;
+    if (isFrameInUse_) return;
     maxSz = _maxSz;
     displayQueue_ = std::vector<CUVIDPARSERDISPINFO>(maxSz, CUVIDPARSERDISPINFO());
     isFrameInUse_ = new volatile int[maxSz];
     std::memset((void*)isFrameInUse_, 0, sizeof(*isFrameInUse_) * maxSz);
 }
 
+void cv::cudacodec::detail::FrameQueue::resize(const int newSz) {
+    if (newSz == maxSz) return;
+    if (!isFrameInUse_) return init(newSz);
+    AutoLock autoLock(mtx_);
+    const int maxSzOld = maxSz; maxSz = newSz;
+    const auto displayQueueOld = displayQueue_;
+    displayQueue_ = std::vector<CUVIDPARSERDISPINFO>(maxSz, CUVIDPARSERDISPINFO());
+    for (int i = readPosition_; i < readPosition_ + framesInQueue_; i++)
+        displayQueue_.at(i % displayQueue_.size()) = displayQueueOld.at(i % displayQueueOld.size());
+    const volatile int* const isFrameInUseOld = isFrameInUse_;
+    isFrameInUse_ = new volatile int[maxSz];
+    std::memset((void*)isFrameInUse_, 0, sizeof(*isFrameInUse_) * maxSz);
+    std::memcpy((void*)isFrameInUse_, (void*)isFrameInUseOld, sizeof(*isFrameInUseOld) * min(maxSz,maxSzOld));
+    delete[] isFrameInUseOld;
+}
+
 bool cv::cudacodec::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex, const bool allowFrameDrop)
 {
     while (isInUse(pictureIndex))
@@ -79,6 +94,15 @@ bool cv::cudacodec::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex
     return true;
 }
 
+bool cv::cudacodec::detail::FrameQueue::waitUntilEmpty() {
+    while (framesInQueue_) {
+        Thread::sleep(1);
+        if (isEndOfDecode())
+            return false;
+    }
+    return true;
+}
+
 void cv::cudacodec::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams, const std::vector<RawPacket> rawPackets)
 {
     // Mark the frame as 'in-use' so we don't re-use it for decoding until it is no longer needed

diff --git a/modules/cudacodec/src/frame_queue.hpp b/modules/cudacodec/src/frame_queue.hpp
@@ -66,6 +66,12 @@ class FrameQueue
     ~FrameQueue();
     void init(const int _maxSz);
 
+    // Resize the current frame queue keeping any existing queued values - must only
+    // be called in the same thread as enqueue.
+    // Parameters:
+    //      newSz - new size of the frame queue.
+    void resize(const int newSz);
+
     void endDecode() { endOfDecode_ = true; }
     bool isEndOfDecode() const { return endOfDecode_ != 0;}
 
@@ -77,6 +83,8 @@ class FrameQueue
     // to ensure a frame is available.
     bool waitUntilFrameAvailable(int pictureIndex, const bool allowFrameDrop = false);
 
+    bool waitUntilEmpty();
+
     void enqueue(const CUVIDPARSERDISPINFO* picParams, const std::vector<RawPacket> rawPackets);
 
     // Deque the next frame.
@@ -97,6 +105,7 @@ class FrameQueue
     bool dequeueUntil(const int pictureIndex);
 
     void releaseFrame(const CUVIDPARSERDISPINFO& picParams) { isFrameInUse_[picParams.picture_index] = 0; }
+    int getMaxSz() { return maxSz; }
 private:
     bool isInUse(int pictureIndex) const { return isFrameInUse_[pictureIndex] != 0; }
 

diff --git a/modules/cudacodec/src/video_decoder.cpp b/modules/cudacodec/src/video_decoder.cpp
@@ -124,8 +124,8 @@ void cv::cudacodec::detail::VideoDecoder::create(const FormatInfo& videoFormat)
     cuSafeCall(cuvidGetDecoderCaps(&decodeCaps));
     cuSafeCall(cuCtxPopCurrent(NULL));
     if (!(decodeCaps.bIsSupported && (decodeCaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_NV12)))){
-        CV_Error(Error::StsUnsupportedFormat, "Video source is not supported by hardware video decoder");
         CV_LOG_ERROR(NULL, "Video source is not supported by hardware video decoder.");
+        CV_Error(Error::StsUnsupportedFormat, "Video source is not supported by hardware video decoder");
     }
     CV_Assert(videoFormat.ulWidth >= decodeCaps.nMinWidth &&
         videoFormat.ulHeight >= decodeCaps.nMinHeight &&
@@ -162,6 +162,61 @@ void cv::cudacodec::detail::VideoDecoder::create(const FormatInfo& videoFormat)
     cuSafeCall(cuCtxPushCurrent(ctx_));
     cuSafeCall(cuvidCreateDecoder(&decoder_, &createInfo_));
     cuSafeCall(cuCtxPopCurrent(NULL));
+    inited_ = true;
+}
+
+int cv::cudacodec::detail::VideoDecoder::reconfigure(const FormatInfo& videoFormat) {
+    if (videoFormat.nBitDepthMinus8 != videoFormat_.nBitDepthMinus8 || videoFormat.nBitDepthChromaMinus8 != videoFormat_.nBitDepthChromaMinus8) {
+        CV_LOG_ERROR(NULL, "Reconfigure Not supported for bit depth change");
+        CV_Error(Error::StsUnsupportedFormat, "Reconfigure Not supported for bit depth change");
+    }
+
+    if (videoFormat.chromaFormat != videoFormat_.chromaFormat) {
+        CV_LOG_ERROR(NULL, "Reconfigure Not supported for chroma format change");
+        CV_Error(Error::StsUnsupportedFormat, "Reconfigure Not supported for chroma format change");
+    }
+
+    const bool decodeResChange = !(videoFormat.ulWidth == videoFormat_.ulWidth && videoFormat.ulHeight == videoFormat_.ulHeight);
+
+    if ((videoFormat.ulWidth > videoFormat_.ulMaxWidth) || (videoFormat.ulHeight > videoFormat_.ulMaxHeight)) {
+        // For VP9, let driver  handle the change if new width/height > maxwidth/maxheight
+        if (videoFormat.codec != Codec::VP9) {
+            CV_LOG_ERROR(NULL, "Reconfigure Not supported when width/height > maxwidth/maxheight");
+            CV_Error(Error::StsUnsupportedFormat, "Reconfigure Not supported when width/height > maxwidth/maxheight");
+        }
+    }
+
+    {
+        AutoLock autoLock(mtx_);
+        videoFormat_.ulNumDecodeSurfaces = videoFormat.ulNumDecodeSurfaces;
+        videoFormat_.ulWidth = videoFormat.ulWidth;
+        videoFormat_.ulHeight = videoFormat.ulHeight;
+        videoFormat_.targetRoi = videoFormat.targetRoi;
+    }
+
+    if (!decodeResChange)
+        return 1;
+
+    CUVIDRECONFIGUREDECODERINFO reconfigParams = { 0 };
+    reconfigParams.ulWidth = videoFormat_.ulWidth;
+    reconfigParams.ulHeight = videoFormat_.ulHeight;
+    reconfigParams.display_area.left = videoFormat_.displayArea.x;
+    reconfigParams.display_area.right = videoFormat_.displayArea.x + videoFormat_.displayArea.width;
+    reconfigParams.display_area.top = videoFormat_.displayArea.y;
+    reconfigParams.display_area.bottom = videoFormat_.displayArea.y + videoFormat_.displayArea.height;
+    reconfigParams.ulTargetWidth = videoFormat_.width;
+    reconfigParams.ulTargetHeight = videoFormat_.height;
+    reconfigParams.target_rect.left = videoFormat_.targetRoi.x;
+    reconfigParams.target_rect.right = videoFormat_.targetRoi.x + videoFormat_.targetRoi.width;
+    reconfigParams.target_rect.top = videoFormat_.targetRoi.y;
+    reconfigParams.target_rect.bottom = videoFormat_.targetRoi.y + videoFormat_.targetRoi.height;
+    reconfigParams.ulNumDecodeSurfaces = videoFormat_.ulNumDecodeSurfaces;
+
+    cuSafeCall(cuCtxPushCurrent(ctx_));
+    cuSafeCall(cuvidReconfigureDecoder(decoder_, &reconfigParams));
+    cuSafeCall(cuCtxPopCurrent(NULL));
+    CV_LOG_INFO(NULL, "Reconfiguring Decoder");
+    return videoFormat_.ulNumDecodeSurfaces;
 }
 
 void cv::cudacodec::detail::VideoDecoder::release()

diff --git a/modules/cudacodec/src/video_decoder.hpp b/modules/cudacodec/src/video_decoder.hpp
@@ -49,11 +49,12 @@ namespace cv { namespace cudacodec { namespace detail {
 class VideoDecoder
 {
 public:
-    VideoDecoder(const Codec& codec, const int minNumDecodeSurfaces, cv::Size targetSz, cv::Rect srcRoi, cv::Rect targetRoi, CUcontext ctx, CUvideoctxlock lock) :
+    VideoDecoder(const Codec& codec, const int minNumDecodeSurfaces, cv::Size targetSz, cv::Rect srcRoi, cv::Rect targetRoi, const ResolutionChangeMode resChangeMode, CUcontext ctx, CUvideoctxlock lock) :
         ctx_(ctx), lock_(lock), decoder_(0)
     {
         videoFormat_.codec = codec;
         videoFormat_.ulNumDecodeSurfaces = minNumDecodeSurfaces;
+        videoFormat_.resChangeMode = resChangeMode;
         // alignment enforced by nvcuvid, likely due to chroma subsampling
         videoFormat_.targetSz.width = targetSz.width - targetSz.width % 2; videoFormat_.targetSz.height = targetSz.height - targetSz.height % 2;
         videoFormat_.srcRoi.x = srcRoi.x - srcRoi.x % 4; videoFormat_.srcRoi.width = srcRoi.width - srcRoi.width % 4;
@@ -68,14 +69,16 @@ class VideoDecoder
     }
 
     void create(const FormatInfo& videoFormat);
+    int reconfigure(const FormatInfo& videoFormat);
     void release();
 
-    // Get the code-type currently used.
+    // Get the codec-type currently used.
     cudaVideoCodec codec() const { return static_cast<cudaVideoCodec>(videoFormat_.codec); }
     int nDecodeSurfaces() const { return videoFormat_.ulNumDecodeSurfaces; }
     cv::Size getTargetSz() const { return videoFormat_.targetSz; }
     cv::Rect getSrcRoi() const { return videoFormat_.srcRoi; }
     cv::Rect getTargetRoi() const { return videoFormat_.targetRoi; }
+    ResolutionChangeMode getResChangeMode() const { return videoFormat_.resChangeMode; }
 
     unsigned long frameWidth() const { return videoFormat_.ulWidth; }
     unsigned long frameHeight() const { return videoFormat_.ulHeight; }
@@ -84,6 +87,8 @@ class VideoDecoder
     unsigned long targetWidth() { return videoFormat_.width; }
     unsigned long targetHeight() { return videoFormat_.height; }
 
+    bool inited() { return inited_; }
+
     cudaVideoChromaFormat chromaFormat() const { return static_cast<cudaVideoChromaFormat>(videoFormat_.chromaFormat); }
     int nBitDepthMinus8() const { return videoFormat_.nBitDepthMinus8; }
 
@@ -114,6 +119,7 @@ class VideoDecoder
     CUvideodecoder        decoder_ = 0;
     FormatInfo videoFormat_ = {};
     Mutex mtx_;
+    bool inited_ = false;
 };
 
 }}}