diff --git a/modules/cudacodec/CMakeLists.txt b/modules/cudacodec/CMakeLists.txt
index 9281024e8b4..6ff9f1ae9d7 100644
--- a/modules/cudacodec/CMakeLists.txt
+++ b/modules/cudacodec/CMakeLists.txt
@@ -6,24 +6,33 @@ set(the_description "CUDA-accelerated Video Encoding/Decoding")
 
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wshadow)
 
-ocv_add_module(cudacodec opencv_core opencv_videoio opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudev WRAP python)
+set(required_dependencies opencv_core opencv_videoio opencv_cudaarithm opencv_cudawarping)
+if(HAVE_NVCUVENC)
+  list(APPEND required_dependencies opencv_cudaimgproc)
+endif()
+
+ocv_add_module(cudacodec ${required_dependencies} OPTIONAL opencv_cudev WRAP python)
 
 ocv_module_include_directories()
 ocv_glob_module_sources()
 
 set(extra_libs "")
 
-if(HAVE_NVCUVID)
-  list(APPEND extra_libs ${CUDA_CUDA_LIBRARY} ${CUDA_nvcuvid_LIBRARY})
-endif()
-
-if(HAVE_NVCUVENC)
-  if(WIN32)
-    list(APPEND extra_libs ${CUDA_nvcuvenc_LIBRARY})
+if(HAVE_NVCUVID OR HAVE_NVCUVENC)
+  list(APPEND extra_libs ${CUDA_CUDA_LIBRARY})
+  if(HAVE_NVCUVID)
+    list(APPEND extra_libs ${CUDA_nvcuvid_LIBRARY})
+  endif()
+  if(HAVE_NVCUVENC)
+      if(WIN32)
+        list(APPEND extra_libs ${CUDA_nvencodeapi_LIBRARY})
+      else()
+        list(APPEND extra_libs ${CUDA_nvidia-encode_LIBRARY})
+      endif()
   endif()
 endif()
 
 ocv_create_module(${extra_libs})
 
 ocv_add_accuracy_tests()
-ocv_add_perf_tests()
+ocv_add_perf_tests()
\ No newline at end of file
diff --git a/modules/cudacodec/include/opencv2/cudacodec.hpp b/modules/cudacodec/include/opencv2/cudacodec.hpp
index 5aacba0ab92..7a772516814 100644
--- a/modules/cudacodec/include/opencv2/cudacodec.hpp
+++ b/modules/cudacodec/include/opencv2/cudacodec.hpp
@@ -66,114 +66,162 @@ using namespace cuda;  // Stream
 
 ////////////////////////////////// Video Encoding //////////////////////////////////
 
-// Works only under Windows.
-// Supports only H264 video codec and AVI files.
-
-enum SurfaceFormat
-{
-    SF_UYVY = 0,
-    SF_YUY2,
-    SF_YV12,
-    SF_NV12,
-    SF_IYUV,
-    SF_BGR,
-    SF_GRAY = SF_BGR
-};
-
-/** @brief Different parameters for CUDA video encoder.
+/** @brief Video codecs supported by cudacodec::VideoReader and cudacodec::VideoWriter.
+@note
+   -   Support will depend on your hardware, refer to the Nvidia Video Codec SDK Video Encode and Decode GPU Support Matrix for details.
  */
-struct CV_EXPORTS_W EncoderParams
+enum Codec
 {
-    int P_Interval;      //!< NVVE_P_INTERVAL,
-    int IDR_Period;      //!< NVVE_IDR_PERIOD,
-    int DynamicGOP;      //!< NVVE_DYNAMIC_GOP,
-    int RCType;          //!< NVVE_RC_TYPE,
-    int AvgBitrate;      //!< NVVE_AVG_BITRATE,
-    int PeakBitrate;     //!< NVVE_PEAK_BITRATE,
-    int QP_Level_Intra;  //!< NVVE_QP_LEVEL_INTRA,
-    int QP_Level_InterP; //!< NVVE_QP_LEVEL_INTER_P,
-    int QP_Level_InterB; //!< NVVE_QP_LEVEL_INTER_B,
-    int DeblockMode;     //!< NVVE_DEBLOCK_MODE,
-    int ProfileLevel;    //!< NVVE_PROFILE_LEVEL,
-    int ForceIntra;      //!< NVVE_FORCE_INTRA,
-    int ForceIDR;        //!< NVVE_FORCE_IDR,
-    int ClearStat;       //!< NVVE_CLEAR_STAT,
-    int DIMode;          //!< NVVE_SET_DEINTERLACE,
-    int Presets;         //!< NVVE_PRESETS,
-    int DisableCabac;    //!< NVVE_DISABLE_CABAC,
-    int NaluFramingType; //!< NVVE_CONFIGURE_NALU_FRAMING_TYPE
-    int DisableSPSPPS;   //!< NVVE_DISABLE_SPS_PPS
-
-    EncoderParams();
-    /** @brief Constructors.
-
-    @param configFile Config file name.
-
-    Creates default parameters or reads parameters from config file.
-     */
-    explicit EncoderParams(const String& configFile);
+    MPEG1 = 0,
+    MPEG2,
+    MPEG4,
+    VC1,
+    H264,
+    JPEG,
+    H264_SVC,
+    H264_MVC,
+    HEVC,
+    VP8,
+    VP9,
+    AV1,
+    NumCodecs,
 
-    /** @brief Reads parameters from config file.
+    Uncompressed_YUV420 = (('I' << 24) | ('Y' << 16) | ('U' << 8) | ('V')),   //!< Y,U,V (4:2:0)
+    Uncompressed_YV12 = (('Y' << 24) | ('V' << 16) | ('1' << 8) | ('2')),   //!< Y,V,U (4:2:0)
+    Uncompressed_NV12 = (('N' << 24) | ('V' << 16) | ('1' << 8) | ('2')),   //!< Y,UV  (4:2:0)
+    Uncompressed_YUYV = (('Y' << 24) | ('U' << 16) | ('Y' << 8) | ('V')),   //!< YUYV/YUY2 (4:2:2)
+    Uncompressed_UYVY = (('U' << 24) | ('Y' << 16) | ('V' << 8) | ('Y'))    //!< UYVY (4:2:2)
+};
 
-    @param configFile Config file name.
-     */
-    void load(const String& configFile);
-    /** @brief Saves parameters to config file.
+/** @brief ColorFormat for the frame returned by VideoReader::nextFrame() and VideoReader::retrieve() or used to initialize a VideoWriter.
+*/
+enum class ColorFormat {
+    UNDEFINED = 0,
+    BGRA = 1, //!< OpenCV color format, can be used with both VideoReader and VideoWriter.
+    BGR = 2, //!< OpenCV color format, can be used with both VideoReader and VideoWriter.
+    GRAY = 3, //!< OpenCV color format, can be used with both VideoReader and VideoWriter.
+    NV_NV12 = 4, //!< Nvidia color format - equivalent to YUV - Semi-Planar YUV [Y plane followed by interleaved UV plane], can be used with both VideoReader and VideoWriter.
+
+    RGB = 5, //!< OpenCV color format, can only be used with VideoWriter.
+    RGBA = 6, //!< OpenCV color format, can only be used with VideoWriter.
+    NV_YV12 = 8, //!< Nvidia Buffer Format - Planar YUV [Y plane followed by V and U planes], use with VideoReader, can only be used with VideoWriter.
+    NV_IYUV = 9, //!< Nvidia Buffer Format - Planar YUV [Y plane followed by U and V planes], use with VideoReader, can only be used with VideoWriter.
+    NV_YUV444 = 10, //!< Nvidia Buffer Format - Planar YUV [Y plane followed by U and V planes], use with VideoReader, can only be used with VideoWriter.
+    NV_AYUV = 11, //!< Nvidia Buffer Format - 8 bit Packed A8Y8U8V8. This is a word-ordered format where a pixel is represented by a 32-bit word with V in the lowest 8 bits, U in the next 8 bits, Y in the 8 bits after that and A in the highest 8 bits, can only be used with VideoWriter.
+#ifndef CV_DOXYGEN
+    PROP_NOT_SUPPORTED
+#endif
+};
 
-    @param configFile Config file name.
-     */
-    void save(const String& configFile) const;
+/** @brief Rate Control Modes.
+*/
+enum EncodeParamsRcMode {
+    ENC_PARAMS_RC_CONSTQP = 0x0, //!< Constant QP mode.
+    ENC_PARAMS_RC_VBR = 0x1, //!< Variable bitrate mode.
+    ENC_PARAMS_RC_CBR = 0x2 //!< Constant bitrate mode.
 };
 
-/** @brief Callbacks for CUDA video encoder.
- */
-class CV_EXPORTS_W EncoderCallBack
+/** @brief Multi Pass Encoding.
+*/
+enum EncodeMultiPass
 {
-public:
-    enum PicType
-    {
-        IFRAME = 1,
-        PFRAME = 2,
-        BFRAME = 3
-    };
+    ENC_MULTI_PASS_DISABLED = 0x0, //!< Single Pass.
+    ENC_TWO_PASS_QUARTER_RESOLUTION = 0x1, //!< Two Pass encoding is enabled where first Pass is quarter resolution.
+    ENC_TWO_PASS_FULL_RESOLUTION = 0x2, //!< Two Pass encoding is enabled where first Pass is full resolution.
+};
 
-    virtual ~EncoderCallBack() {}
 
-    /** @brief Callback function to signal the start of bitstream that is to be encoded.
+/** @brief Supported Encoder Profiles.
+*/
+enum EncodeProfile {
+    ENC_CODEC_PROFILE_AUTOSELECT = 0,
+    ENC_H264_PROFILE_BASELINE = 1,
+    ENC_H264_PROFILE_MAIN = 2,
+    ENC_H264_PROFILE_HIGH = 3,
+    ENC_H264_PROFILE_HIGH_444 = 4,
+    ENC_H264_PROFILE_STEREO = 5,
+    ENC_H264_PROFILE_PROGRESSIVE_HIGH = 6,
+    ENC_H264_PROFILE_CONSTRAINED_HIGH = 7,
+    ENC_HEVC_PROFILE_MAIN = 8,
+    ENC_HEVC_PROFILE_MAIN10 = 9,
+    ENC_HEVC_PROFILE_FREXT = 10
+};
 
-    Callback must allocate buffer for CUDA encoder and return pointer to it and it's size.
-     */
-    virtual uchar* acquireBitStream(int* bufferSize) = 0;
+/** @brief Nvidia Encoding Presets. Performance degrades and quality improves as we move from P1 to P7.
+*/
+enum EncodePreset {
+    ENC_PRESET_P1 = 1,
+    ENC_PRESET_P2 = 2,
+    ENC_PRESET_P3 = 3,
+    ENC_PRESET_P4 = 4,
+    ENC_PRESET_P5 = 5,
+    ENC_PRESET_P6 = 6,
+    ENC_PRESET_P7 = 7
+};
 
-    /** @brief Callback function to signal that the encoded bitstream is ready to be written to file.
-    */
-    virtual void releaseBitStream(unsigned char* data, int size) = 0;
+/** @brief Tuning information.
+*/
+enum EncodeTuningInfo {
+    ENC_TUNING_INFO_UNDEFINED = 0, //!< Undefined tuningInfo. Invalid value for encoding.
+    ENC_TUNING_INFO_HIGH_QUALITY = 1, //!< Tune presets for latency tolerant encoding.
+    ENC_TUNING_INFO_LOW_LATENCY = 2, //!< Tune presets for low latency streaming.
+    ENC_TUNING_INFO_ULTRA_LOW_LATENCY = 3, //!< Tune presets for ultra low latency streaming.
+    ENC_TUNING_INFO_LOSSLESS = 4, //!< Tune presets for lossless encoding.
+    ENC_TUNING_INFO_COUNT
+};
 
-    /** @brief Callback function to signal that the encoding operation on the frame has started.
+/** Quantization Parameter for each type of frame when using ENC_PARAMS_RC_MODE::ENC_PARAMS_RC_CONSTQP.
+*/
+struct CV_EXPORTS_W_SIMPLE EncodeQp
+{
+    CV_PROP_RW uint32_t qpInterP; //!< Specifies QP value for P-frame.
+    CV_PROP_RW uint32_t qpInterB; //!< Specifies QP value for B-frame.
+    CV_PROP_RW uint32_t qpIntra; //!< Specifies QP value for Intra Frame.
+};
 
-    @param frameNumber
-    @param picType Specify frame type (I-Frame, P-Frame or B-Frame).
-     */
-    CV_WRAP virtual void onBeginFrame(int frameNumber, EncoderCallBack::PicType picType) = 0;
+/** @brief Different parameters for CUDA video encoder.
+*/
+struct CV_EXPORTS_W_SIMPLE EncoderParams
+{
+public:
+    CV_WRAP EncoderParams();
+    CV_PROP_RW EncodePreset nvPreset;
+    CV_PROP_RW EncodeTuningInfo tuningInfo;
+    CV_PROP_RW EncodeProfile encodingProfile;
+    CV_PROP_RW EncodeParamsRcMode rateControlMode;
+    CV_PROP_RW EncodeMultiPass multiPassEncoding;
+    CV_PROP_RW EncodeQp constQp; //!< QP's for ENC_PARAMS_RC_CONSTQP.
+    CV_PROP_RW int averageBitRate; //!< target bitrate for ENC_PARAMS_RC_VBR and ENC_PARAMS_RC_CBR.
+    CV_PROP_RW int maxBitRate; //!< upper bound on bitrate for ENC_PARAMS_RC_VBR and ENC_PARAMS_RC_CONSTQP.
+    CV_PROP_RW uint8_t targetQuality; //!< value 0 - 51 where video quality decreases as targetQuality increases, used with ENC_PARAMS_RC_VBR.
+    CV_PROP_RW int gopLength;
+};
+CV_EXPORTS bool operator==(const EncoderParams& lhs, const EncoderParams& rhs);
 
-    /** @brief Callback function signals that the encoding operation on the frame has finished.
+/** @brief Interface for encoder callbacks.
 
-    @param frameNumber
-    @param picType Specify frame type (I-Frame, P-Frame or B-Frame).
-     */
-    CV_WRAP virtual void onEndFrame(int frameNumber, EncoderCallBack::PicType picType) = 0;
-};
+User can implement own multiplexing by implementing this interface.
+*/
+class CV_EXPORTS_W EncoderCallback {
+public:
+    /** @brief Callback function to signal that the encoded bitstream for one or more frames is ready.
 
-/** @brief Video writer interface.
+    @param vPacket The raw bitstream for one or more frames.
+    */
+    virtual void onEncoded(std::vector<std::vector<uint8_t>> vPacket) = 0;
 
-The implementation uses H264 video codec.
+    /** @brief Callback function to that the encoding has finished.
+    * */
+    virtual void onEncodingFinished() = 0;
 
-@note Currently only Windows platform is supported.
+    virtual ~EncoderCallback() {}
+};
 
+/** @brief Video writer interface.
 @note
    -   An example on how to use the videoWriter class can be found at
         opencv_source_code/samples/gpu/video_writer.cpp
- */
+*/
 class CV_EXPORTS_W VideoWriter
 {
 public:
@@ -181,90 +229,51 @@ class CV_EXPORTS_W VideoWriter
 
     /** @brief Writes the next video frame.
 
-    @param frame The written frame.
-    @param lastFrame Indicates that it is end of stream. The parameter can be ignored.
+    @param frame The framet to be written.
 
-    The method write the specified image to video file. The image must have the same size and the same
+    The method encodes the specified image to a video stream. The image must have the same size and the same
     surface format as has been specified when opening the video writer.
-     */
-    CV_WRAP virtual void write(InputArray frame, bool lastFrame = false) = 0;
+    */
+    CV_WRAP virtual void write(InputArray frame) = 0;
 
+    /** @brief Retrieve the encoding parameters.
+    */
     CV_WRAP virtual EncoderParams getEncoderParams() const = 0;
+
+    /** @brief Waits until the encoding process has finished before calling EncoderCallback::onEncodingFinished().
+    */
+    CV_WRAP virtual void release() = 0;
 };
 
 /** @brief Creates video writer.
 
-@param fileName Name of the output video file. Only AVI file format is supported.
+@param fileName Name of the output video file. Only raw h264 or hevc files are supported.
 @param frameSize Size of the input video frames.
+@param codec Codec.
 @param fps Framerate of the created video stream.
-@param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 ,
-SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before
-encoding, frames with other formats will be used as is.
-
-The constructors initialize video writer. FFMPEG is used to write videos. User can implement own
-multiplexing with cudacodec::EncoderCallBack .
- */
-CV_EXPORTS_W Ptr<cudacodec::VideoWriter> createVideoWriter(const String& fileName, Size frameSize, double fps, SurfaceFormat format = SF_BGR);
-/** @overload
-@param fileName Name of the output video file. Only AVI file format is supported.
-@param frameSize Size of the input video frames.
-@param fps Framerate of the created video stream.
-@param params Encoder parameters. See cudacodec::EncoderParams .
-@param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 ,
-SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before
-encoding, frames with other formats will be used as is.
+@param colorFormat OpenCv color format of the frames to be encoded.
+@param encoderCallback Callbacks for video encoder. See cudacodec::EncoderCallback. Required for working with the encoded video stream.
+@param stream Stream for frame pre-processing.
 */
-CV_EXPORTS_W Ptr<cudacodec::VideoWriter> createVideoWriter(const String& fileName, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR);
+CV_EXPORTS_W Ptr<cudacodec::VideoWriter> createVideoWriter(const String& fileName, const Size frameSize, const Codec codec = Codec::H264, const double fps = 25.0,
+    const ColorFormat colorFormat = ColorFormat::BGR, Ptr<EncoderCallback> encoderCallback = 0, const Stream& stream = Stream::Null());
 
-/** @overload
-@param encoderCallback Callbacks for video encoder. See cudacodec::EncoderCallBack . Use it if you
-want to work with raw video stream.
-@param frameSize Size of the input video frames.
-@param fps Framerate of the created video stream.
-@param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 ,
-SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before
-encoding, frames with other formats will be used as is.
-*/
-CV_EXPORTS_W Ptr<cudacodec::VideoWriter> createVideoWriter(const Ptr<EncoderCallBack>& encoderCallback, Size frameSize, double fps, SurfaceFormat format = SF_BGR);
-/** @overload
-@param encoderCallback Callbacks for video encoder. See cudacodec::EncoderCallBack . Use it if you
-want to work with raw video stream.
+/** @brief Creates video writer.
+
+@param fileName Name of the output video file. Only raw h264 or hevc files are supported.
 @param frameSize Size of the input video frames.
+@param codec Codec.
 @param fps Framerate of the created video stream.
-@param params Encoder parameters. See cudacodec::EncoderParams.
-@param format Surface format of input frames ( SF_UYVY , SF_YUY2 , SF_YV12 , SF_NV12 ,
-SF_IYUV , SF_BGR or SF_GRAY). BGR or gray frames will be converted to YV12 format before
-encoding, frames with other formats will be used as is.
+@param colorFormat OpenCv color format of the frames to be encoded.
+@param params Additional encoding parameters.
+@param encoderCallback Callbacks for video encoder. See cudacodec::EncoderCallback. Required for working with the encoded video stream.
+@param stream Stream for frame pre-processing.
 */
-CV_EXPORTS_W Ptr<cudacodec::VideoWriter> createVideoWriter(const Ptr<EncoderCallBack>& encoderCallback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR);
+CV_EXPORTS_W Ptr<cudacodec::VideoWriter> createVideoWriter(const String& fileName, const Size frameSize, const Codec codec, const double fps,  const ColorFormat colorFormat,
+    const EncoderParams& params, Ptr<EncoderCallback> encoderCallback = 0, const Stream& stream = Stream::Null());
 
 ////////////////////////////////// Video Decoding //////////////////////////////////////////
 
-/** @brief Video codecs supported by cudacodec::VideoReader .
- */
-enum Codec
-{
-    MPEG1 = 0,
-    MPEG2,
-    MPEG4,
-    VC1,
-    H264,
-    JPEG,
-    H264_SVC,
-    H264_MVC,
-    HEVC,
-    VP8,
-    VP9,
-    AV1,
-    NumCodecs,
-
-    Uncompressed_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')),   //!< Y,U,V (4:2:0)
-    Uncompressed_YV12   = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')),   //!< Y,V,U (4:2:0)
-    Uncompressed_NV12   = (('N'<<24)|('V'<<16)|('1'<<8)|('2')),   //!< Y,UV  (4:2:0)
-    Uncompressed_YUYV   = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')),   //!< YUYV/YUY2 (4:2:2)
-    Uncompressed_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y'))    //!< UYVY (4:2:2)
-};
-
 /** @brief Chroma formats supported by cudacodec::VideoReader.
  */
 enum ChromaFormat
@@ -331,18 +340,6 @@ enum class VideoReaderProps {
 #endif
 };
 
-/** @brief ColorFormat for the frame returned by nextFrame()/retrieve().
-*/
-enum class ColorFormat {
-    BGRA = 1,
-    BGR = 2,
-    GRAY = 3,
-    YUV = 4,
-#ifndef CV_DOXYGEN
-    PROP_NOT_SUPPORTED
-#endif
-};
-
 /** @brief Video reader interface.
 
 @note
@@ -438,8 +435,9 @@ class CV_EXPORTS_W VideoReader
     /** @brief Set the desired ColorFormat for the frame returned by nextFrame()/retrieve().
 
     @param colorFormat Value of the ColorFormat.
+    @return `true` unless the colorFormat is not supported.
      */
-    CV_WRAP virtual void set(const ColorFormat colorFormat) = 0;
+    CV_WRAP virtual bool set(const ColorFormat colorFormat) = 0;
 
     /** @brief Returns the specified VideoReader property
 
diff --git a/modules/cudacodec/misc/python/pyopencv_cudacodec.hpp b/modules/cudacodec/misc/python/pyopencv_cudacodec.hpp
deleted file mode 100644
index 15fd43de427..00000000000
--- a/modules/cudacodec/misc/python/pyopencv_cudacodec.hpp
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifdef HAVE_OPENCV_CUDACODEC
-
-#include "opencv2/cudacodec.hpp"
-
-typedef cudacodec::EncoderCallBack::PicType EncoderCallBack_PicType;
-
-CV_PY_TO_CLASS(cudacodec::EncoderParams);
-CV_PY_FROM_CLASS(cudacodec::EncoderParams);
-
-#endif
diff --git a/modules/cudacodec/misc/python/test/test_cudacodec.py b/modules/cudacodec/misc/python/test/test_cudacodec.py
index dc9f7a40aae..3f41c3bbede 100644
--- a/modules/cudacodec/misc/python/test/test_cudacodec.py
+++ b/modules/cudacodec/misc/python/test/test_cudacodec.py
@@ -2,7 +2,7 @@
 import os
 import cv2 as cv
 import numpy as np
-
+import tempfile
 from tests_common import NewOpenCVTests, unittest
 
 class cudacodec_test(NewOpenCVTests):
@@ -79,12 +79,28 @@ def test_writer_existence(self):
         #Test at least the existence of wrapped functions for now
 
         try:
-            _writer = cv.cudacodec.createVideoWriter("tmp", (128, 128), 30)
+            fd, fname = tempfile.mkstemp(suffix=".h264")
+            os.close(fd)
+            encoder_params_in = cv.cudacodec.EncoderParams()
+            encoder_params_in.gopLength = 10
+            stream = cv.cuda.Stream()
+            sz = (1920,1080)
+            writer = cv.cudacodec.createVideoWriter(fname, sz, cv.cudacodec.H264, 30, cv.cudacodec.ColorFormat_BGR,
+                                                    encoder_params_in, stream=stream)
+            blankFrameIn = cv.cuda.GpuMat(sz,cv.CV_8UC3)
+            writer.write(blankFrameIn)
+            writer.release()
+            encoder_params_out = writer.getEncoderParams()
+            self.assert_true(encoder_params_in.gopLength == encoder_params_out.gopLength)
+            cap = cv.VideoCapture(fname,cv.CAP_FFMPEG)
+            self.assert_true(cap.isOpened())
+            ret, blankFrameOut = cap.read()
+            self.assert_true(ret and blankFrameOut.shape == blankFrameIn.download().shape)
         except cv.error as e:
             self.assertEqual(e.code, cv.Error.StsNotImplemented)
-            self.skipTest("NVCUVENC is not installed")
+            self.skipTest("Either NVCUVENC or a GPU hardware encoder is missing or the encoding profile is not supported.")
 
-        self.assertTrue(True) #It is sufficient that no exceptions have been there
+        os.remove(fname)
 
 if __name__ == '__main__':
     NewOpenCVTests.bootstrap()
\ No newline at end of file
diff --git a/modules/cudacodec/perf/perf_video.cpp b/modules/cudacodec/perf/perf_video.cpp
index af7b2f67c90..bb4e9a4a775 100644
--- a/modules/cudacodec/perf/perf_video.cpp
+++ b/modules/cudacodec/perf/perf_video.cpp
@@ -45,7 +45,7 @@
 
 namespace opencv_test { namespace {
 
-#if defined(HAVE_NVCUVID)
+#if defined(HAVE_NVCUVID) || defined(HAVE_NVCUVENC)
 
 #if defined(HAVE_FFMPEG_WRAPPER) // should this be set in preprocessor or in cvconfig.h
 #define VIDEO_SRC Values("cv/video/768x576.avi", "cv/video/1920x1080.avi")
@@ -54,6 +54,8 @@ namespace opencv_test { namespace {
 #define VIDEO_SRC Values( "cv/video/1920x1080.avi")
 #endif
 
+#if defined(HAVE_NVCUVID)
+
 DEF_PARAM_TEST_1(FileName, string);
 
 //////////////////////////////////////////////////////
@@ -93,63 +95,97 @@ PERF_TEST_P(FileName, VideoReader, VIDEO_SRC)
 //////////////////////////////////////////////////////
 // VideoWriter
 
-#if defined(HAVE_NVCUVID) && defined(_WIN32)
+#if defined(HAVE_NVCUVENC)
 
-PERF_TEST_P(FileName, VideoWriter, VIDEO_SRC)
-{
-    declare.time(30);
-
-    const string inputFile = perf::TestBase::getDataPath(GetParam());
-    const string outputFile = cv::tempfile(".avi");
+DEF_PARAM_TEST(WriteToFile, string, cv::cudacodec::ColorFormat, cv::cudacodec::Codec);
 
-    const double FPS = 25.0;
+#define COLOR_FORMAT Values(cv::cudacodec::ColorFormat::BGR, cv::cudacodec::ColorFormat::RGB, cv::cudacodec::ColorFormat::BGRA, \
+cv::cudacodec::ColorFormat::RGBA, cv::cudacodec::ColorFormat::GRAY)
+#define CODEC Values(cv::cudacodec::Codec::H264, cv::cudacodec::Codec::HEVC)
 
+PERF_TEST_P(WriteToFile, VideoWriter, Combine(VIDEO_SRC, COLOR_FORMAT, CODEC))
+{
+    declare.time(30);
+    const string inputFile = perf::TestBase::getDataPath(GET_PARAM(0));
+    const cv::cudacodec::ColorFormat surfaceFormat = GET_PARAM(1);
+    const cudacodec::Codec codec = GET_PARAM(2);
+    const double fps = 25;
+    const int nFrames = 20;
     cv::VideoCapture reader(inputFile);
-    ASSERT_TRUE( reader.isOpened() );
-
-    cv::Mat frame;
-
-    if (PERF_RUN_CUDA())
-    {
-        cv::Ptr<cv::cudacodec::VideoWriter> d_writer;
-
-        cv::cuda::GpuMat d_frame;
-
-        for (int i = 0; i < 10; ++i)
-        {
-            reader >> frame;
-            ASSERT_FALSE(frame.empty());
-
-            d_frame.upload(frame);
-
-            if (d_writer.empty())
-                d_writer = cv::cudacodec::createVideoWriter(outputFile, frame.size(), FPS);
-
-            startTimer(); next();
-            d_writer->write(d_frame);
+    ASSERT_TRUE(reader.isOpened());
+    Mat frameBgr;
+    if (PERF_RUN_CUDA()) {
+        const std::string ext = codec == cudacodec::Codec::H264 ? ".h264" : ".hevc";
+        const string outputFile = cv::tempfile(ext.c_str());
+        std::vector<GpuMat> frames;
+        cv::Mat frameNewSf;
+        cuda::Stream stream;
+        ColorConversionCodes conversionCode = COLOR_COLORCVT_MAX;
+        switch (surfaceFormat) {
+        case cudacodec::ColorFormat::RGB:
+            conversionCode = COLOR_BGR2RGB;
+            break;
+        case cudacodec::ColorFormat::BGRA:
+            conversionCode = COLOR_BGR2BGRA;
+            break;
+        case cudacodec::ColorFormat::RGBA:
+            conversionCode = COLOR_BGR2RGBA;
+            break;
+        case cudacodec::ColorFormat::GRAY:
+            conversionCode = COLOR_BGR2GRAY;
+        default:
+            break;
+        }
+        for (int i = 0; i < nFrames; i++) {
+            reader >> frameBgr;
+            ASSERT_FALSE(frameBgr.empty());
+            if (conversionCode == COLOR_COLORCVT_MAX)
+                frameNewSf = frameBgr;
+            else
+                cv::cvtColor(frameBgr, frameNewSf, conversionCode);
+            GpuMat frame; frame.upload(frameNewSf, stream);
+            frames.push_back(frame);
+        }
+        stream.waitForCompletion();
+        cv::Ptr<cv::cudacodec::VideoWriter> d_writer = cv::cudacodec::createVideoWriter(outputFile, frameBgr.size(), codec, fps, surfaceFormat, 0, stream);
+        for (int i = 0; i < nFrames - 1; ++i) {
+            startTimer();
+            d_writer->write(frames[i]);
             stopTimer();
         }
+        startTimer();
+        d_writer->write(frames[nFrames - 1]);
+        d_writer->release();
+        stopTimer();
+
+        ASSERT_EQ(0, remove(outputFile.c_str()));
     }
-    else
-    {
+    else {
+        if (surfaceFormat != cv::cudacodec::ColorFormat::BGR || codec != cv::cudacodec::Codec::H264)
+            throw PerfSkipTestException();
         cv::VideoWriter writer;
-
-        for (int i = 0; i < 10; ++i)
-        {
-            reader >> frame;
-            ASSERT_FALSE(frame.empty());
-
+        const string outputFile = cv::tempfile(".avi");
+        for (int i = 0; i < nFrames-1; ++i) {
+            reader >> frameBgr;
+            ASSERT_FALSE(frameBgr.empty());
             if (!writer.isOpened())
-                writer.open(outputFile, VideoWriter::fourcc('X', 'V', 'I', 'D'), FPS, frame.size());
-
-            startTimer(); next();
-            writer.write(frame);
+                writer.open(outputFile, VideoWriter::fourcc('X', 'V', 'I', 'D'), fps, frameBgr.size());
+            startTimer();
+            writer.write(frameBgr);
             stopTimer();
         }
+        reader >> frameBgr;
+        ASSERT_FALSE(frameBgr.empty());
+        startTimer();
+        writer.write(frameBgr);
+        writer.release();
+        stopTimer();
+
+        ASSERT_EQ(0, remove(outputFile.c_str()));
     }
-
-    SANITY_CHECK(frame);
+    SANITY_CHECK(frameBgr);
 }
 
+#endif
 #endif
 }} // namespace
diff --git a/modules/cudacodec/src/NvEncoder.cpp b/modules/cudacodec/src/NvEncoder.cpp
new file mode 100644
index 00000000000..78b76c78e50
--- /dev/null
+++ b/modules/cudacodec/src/NvEncoder.cpp
@@ -0,0 +1,786 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#include "precomp.hpp"
+
+#if defined(HAVE_NVCUVENC)
+#include "NvEncoder.h"
+
+namespace cv { namespace cudacodec {
+#ifndef _WIN32
+#include <cstring>
+static inline bool operator==(const GUID& guid1, const GUID& guid2) {
+    return !memcmp(&guid1, &guid2, sizeof(GUID));
+}
+
+static inline bool operator!=(const GUID& guid1, const GUID& guid2) {
+    return !(guid1 == guid2);
+}
+#endif
+
+NvEncoder::NvEncoder(NV_ENC_DEVICE_TYPE eDeviceType, void* pDevice, uint32_t nWidth, uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat,
+    uint32_t nExtraOutputDelay) :
+    m_hEncoder(nullptr),
+    m_pDevice(pDevice),
+    m_eDeviceType(eDeviceType),
+    m_nWidth(nWidth),
+    m_nHeight(nHeight),
+    m_nMaxEncodeWidth(nWidth),
+    m_nMaxEncodeHeight(nHeight),
+    m_eBufferFormat(eBufferFormat),
+    m_nExtraOutputDelay(nExtraOutputDelay)
+{
+    LoadNvEncApi();
+
+    if (!m_nvenc.nvEncOpenEncodeSession)
+    {
+        m_nEncoderBuffer = 0;
+        NVENC_THROW_ERROR("EncodeAPI not found", NV_ENC_ERR_NO_ENCODE_DEVICE);
+    }
+
+    NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS encodeSessionExParams = {};
+    encodeSessionExParams.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
+    encodeSessionExParams.device = m_pDevice;
+    encodeSessionExParams.deviceType = m_eDeviceType;
+    encodeSessionExParams.apiVersion = NVENCAPI_VERSION;
+    void* hEncoder = NULL;
+    NVENC_API_CALL(m_nvenc.nvEncOpenEncodeSessionEx(&encodeSessionExParams, &hEncoder));
+    m_hEncoder = hEncoder;
+}
+
+void NvEncoder::LoadNvEncApi()
+{
+
+    uint32_t version = 0;
+    uint32_t currentVersion = (NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION;
+    NVENC_API_CALL(NvEncodeAPIGetMaxSupportedVersion(&version));
+    if (currentVersion > version)
+    {
+        NVENC_THROW_ERROR("Current Driver Version does not support this NvEncodeAPI version, please upgrade driver", NV_ENC_ERR_INVALID_VERSION);
+    }
+
+    m_nvenc = {};
+    m_nvenc.version = NV_ENCODE_API_FUNCTION_LIST_VER;
+    NVENC_API_CALL(NvEncodeAPICreateInstance(&m_nvenc));
+}
+
+NvEncoder::~NvEncoder()
+{
+    DestroyHWEncoder();
+}
+
+void NvEncoder::CreateDefaultEncoderParams(NV_ENC_INITIALIZE_PARAMS* pIntializeParams, GUID codecGuid, GUID presetGuid, NV_ENC_TUNING_INFO tuningInfo)
+{
+    if (!m_hEncoder)
+    {
+        NVENC_THROW_ERROR("Encoder Initialization failed", NV_ENC_ERR_NO_ENCODE_DEVICE);
+        return;
+    }
+
+    if (pIntializeParams == nullptr || pIntializeParams->encodeConfig == nullptr)
+    {
+        NVENC_THROW_ERROR("pInitializeParams and pInitializeParams->encodeConfig can't be NULL", NV_ENC_ERR_INVALID_PTR);
+    }
+
+    memset(pIntializeParams->encodeConfig, 0, sizeof(NV_ENC_CONFIG));
+    auto pEncodeConfig = pIntializeParams->encodeConfig;
+    memset(pIntializeParams, 0, sizeof(NV_ENC_INITIALIZE_PARAMS));
+    pIntializeParams->encodeConfig = pEncodeConfig;
+
+
+    pIntializeParams->encodeConfig->version = NV_ENC_CONFIG_VER;
+    pIntializeParams->version = NV_ENC_INITIALIZE_PARAMS_VER;
+
+    pIntializeParams->encodeGUID = codecGuid;
+    pIntializeParams->presetGUID = presetGuid;
+    pIntializeParams->encodeWidth = m_nWidth;
+    pIntializeParams->encodeHeight = m_nHeight;
+    pIntializeParams->darWidth = m_nWidth;
+    pIntializeParams->darHeight = m_nHeight;
+    pIntializeParams->frameRateNum = 30;
+    pIntializeParams->frameRateDen = 1;
+    pIntializeParams->enablePTD = 1;
+    pIntializeParams->reportSliceOffsets = 0;
+    pIntializeParams->enableSubFrameWrite = 0;
+    pIntializeParams->maxEncodeWidth = m_nWidth;
+    pIntializeParams->maxEncodeHeight = m_nHeight;
+    pIntializeParams->enableMEOnlyMode = false;
+    pIntializeParams->enableOutputInVidmem = false;
+#if defined(_WIN32)
+    pIntializeParams->enableEncodeAsync = GetCapabilityValue(codecGuid, NV_ENC_CAPS_ASYNC_ENCODE_SUPPORT);
+#endif
+    pIntializeParams->tuningInfo = tuningInfo;
+    NV_ENC_PRESET_CONFIG presetConfig = {};
+    presetConfig.version = NV_ENC_PRESET_CONFIG_VER;
+    presetConfig.presetCfg.version = NV_ENC_CONFIG_VER;
+    m_nvenc.nvEncGetEncodePresetConfigEx(m_hEncoder, codecGuid, presetGuid, tuningInfo, &presetConfig);
+    memcpy(pIntializeParams->encodeConfig, &presetConfig.presetCfg, sizeof(NV_ENC_CONFIG));
+
+    if (pIntializeParams->encodeGUID == NV_ENC_CODEC_H264_GUID)
+    {
+        if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 || m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
+        {
+            pIntializeParams->encodeConfig->encodeCodecConfig.h264Config.chromaFormatIDC = 3;
+        }
+        pIntializeParams->encodeConfig->encodeCodecConfig.h264Config.idrPeriod = pIntializeParams->encodeConfig->gopLength;
+    }
+    else if (pIntializeParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID)
+    {
+        pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig.pixelBitDepthMinus8 =
+            (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT || m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) ? 2 : 0;
+        if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 || m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
+        {
+            pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig.chromaFormatIDC = 3;
+        }
+        pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig.idrPeriod = pIntializeParams->encodeConfig->gopLength;
+    }
+
+    return;
+}
+
+void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS* pEncoderParams)
+{
+    if (!m_hEncoder)
+    {
+        NVENC_THROW_ERROR("Encoder Initialization failed", NV_ENC_ERR_NO_ENCODE_DEVICE);
+    }
+
+    if (!pEncoderParams)
+    {
+        NVENC_THROW_ERROR("Invalid NV_ENC_INITIALIZE_PARAMS ptr", NV_ENC_ERR_INVALID_PTR);
+    }
+
+    if (pEncoderParams->encodeWidth == 0 || pEncoderParams->encodeHeight == 0)
+    {
+        NVENC_THROW_ERROR("Invalid encoder width and height", NV_ENC_ERR_INVALID_PARAM);
+    }
+
+    if (pEncoderParams->encodeGUID != NV_ENC_CODEC_H264_GUID && pEncoderParams->encodeGUID != NV_ENC_CODEC_HEVC_GUID)
+    {
+        NVENC_THROW_ERROR("Invalid codec guid", NV_ENC_ERR_INVALID_PARAM);
+    }
+
+    if (pEncoderParams->encodeGUID == NV_ENC_CODEC_H264_GUID)
+    {
+        if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT || m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
+        {
+            NVENC_THROW_ERROR("10-bit format isn't supported by H264 encoder", NV_ENC_ERR_INVALID_PARAM);
+        }
+    }
+
+    // set other necessary params if not set yet
+    if (pEncoderParams->encodeGUID == NV_ENC_CODEC_H264_GUID)
+    {
+        if ((m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444) &&
+            (pEncoderParams->encodeConfig->encodeCodecConfig.h264Config.chromaFormatIDC != 3))
+        {
+            NVENC_THROW_ERROR("Invalid ChromaFormatIDC", NV_ENC_ERR_INVALID_PARAM);
+        }
+    }
+
+    if (pEncoderParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID)
+    {
+        bool yuv10BitFormat = (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT || m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) ? true : false;
+        if (yuv10BitFormat && pEncoderParams->encodeConfig->encodeCodecConfig.hevcConfig.pixelBitDepthMinus8 != 2)
+        {
+            NVENC_THROW_ERROR("Invalid PixelBitdepth", NV_ENC_ERR_INVALID_PARAM);
+        }
+
+        if ((m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 || m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) &&
+            (pEncoderParams->encodeConfig->encodeCodecConfig.hevcConfig.chromaFormatIDC != 3))
+        {
+            NVENC_THROW_ERROR("Invalid ChromaFormatIDC", NV_ENC_ERR_INVALID_PARAM);
+        }
+    }
+
+    memcpy(&m_initializeParams, pEncoderParams, sizeof(m_initializeParams));
+    m_initializeParams.version = NV_ENC_INITIALIZE_PARAMS_VER;
+
+    if (pEncoderParams->encodeConfig)
+    {
+        memcpy(&m_encodeConfig, pEncoderParams->encodeConfig, sizeof(m_encodeConfig));
+        m_encodeConfig.version = NV_ENC_CONFIG_VER;
+    }
+    else
+    {
+        NV_ENC_PRESET_CONFIG presetConfig = {};
+        presetConfig.version = NV_ENC_PRESET_CONFIG_VER;
+        presetConfig.presetCfg.version = NV_ENC_CONFIG_VER;
+        m_nvenc.nvEncGetEncodePresetConfigEx(m_hEncoder, pEncoderParams->encodeGUID, pEncoderParams->presetGUID, pEncoderParams->tuningInfo, &presetConfig);
+        memcpy(&m_encodeConfig, &presetConfig.presetCfg, sizeof(NV_ENC_CONFIG));
+    }
+    m_initializeParams.encodeConfig = &m_encodeConfig;
+    NVENC_API_CALL(m_nvenc.nvEncInitializeEncoder(m_hEncoder, &m_initializeParams));
+    m_bEncoderInitialized = true;
+    m_nWidth = m_initializeParams.encodeWidth;
+    m_nHeight = m_initializeParams.encodeHeight;
+    m_nMaxEncodeWidth = m_initializeParams.maxEncodeWidth;
+    m_nMaxEncodeHeight = m_initializeParams.maxEncodeHeight;
+
+    m_nEncoderBuffer = m_encodeConfig.frameIntervalP + m_encodeConfig.rcParams.lookaheadDepth + m_nExtraOutputDelay;
+    m_nOutputDelay = m_nEncoderBuffer - 1;
+
+    m_vpCompletionEvent.resize(m_nEncoderBuffer, nullptr);
+
+#if defined(_WIN32)
+    for (uint32_t i = 0; i < m_vpCompletionEvent.size(); i++)
+    {
+        m_vpCompletionEvent[i] = CreateEvent(NULL, FALSE, FALSE, NULL);
+        NV_ENC_EVENT_PARAMS eventParams = { NV_ENC_EVENT_PARAMS_VER };
+        eventParams.completionEvent = m_vpCompletionEvent[i];
+        m_nvenc.nvEncRegisterAsyncEvent(m_hEncoder, &eventParams);
+    }
+#endif
+
+    m_vMappedInputBuffers.resize(m_nEncoderBuffer, nullptr);
+    m_vBitstreamOutputBuffer.resize(m_nEncoderBuffer, nullptr);
+    InitializeBitstreamBuffer();
+    AllocateInputBuffers(m_nEncoderBuffer);
+}
+
+void NvEncoder::DestroyEncoder()
+{
+    if (!m_hEncoder)
+    {
+        return;
+    }
+
+    ReleaseInputBuffers();
+
+    DestroyHWEncoder();
+}
+
+void NvEncoder::DestroyHWEncoder()
+{
+    if (!m_hEncoder)
+    {
+        return;
+    }
+
+#if defined(_WIN32)
+    for (uint32_t i = 0; i < m_vpCompletionEvent.size(); i++)
+    {
+        if (m_vpCompletionEvent[i])
+        {
+            NV_ENC_EVENT_PARAMS eventParams = { NV_ENC_EVENT_PARAMS_VER };
+            eventParams.completionEvent = m_vpCompletionEvent[i];
+            m_nvenc.nvEncUnregisterAsyncEvent(m_hEncoder, &eventParams);
+            CloseHandle(m_vpCompletionEvent[i]);
+        }
+    }
+    m_vpCompletionEvent.clear();
+#endif
+
+    DestroyBitstreamBuffer();
+
+    m_nvenc.nvEncDestroyEncoder(m_hEncoder);
+
+    m_hEncoder = nullptr;
+
+    m_bEncoderInitialized = false;
+}
+
+const NvEncInputFrame* NvEncoder::GetNextInputFrame()
+{
+    int i = m_iToSend % m_nEncoderBuffer;
+    return &m_vInputFrames[i];
+}
+
+void NvEncoder::MapResources(uint32_t bfrIdx)
+{
+    NV_ENC_MAP_INPUT_RESOURCE mapInputResource = {};
+    mapInputResource.version = NV_ENC_MAP_INPUT_RESOURCE_VER;
+    mapInputResource.registeredResource = m_vRegisteredResources[bfrIdx];
+    NVENC_API_CALL(m_nvenc.nvEncMapInputResource(m_hEncoder, &mapInputResource));
+    m_vMappedInputBuffers[bfrIdx] = mapInputResource.mappedResource;
+}
+
+void NvEncoder::EncodeFrame(std::vector<std::vector<uint8_t>>& vPacket, NV_ENC_PIC_PARAMS* pPicParams)
+{
+    vPacket.clear();
+    if (!IsHWEncoderInitialized())
+    {
+        NVENC_THROW_ERROR("Encoder device not found", NV_ENC_ERR_NO_ENCODE_DEVICE);
+    }
+
+    int bfrIdx = m_iToSend % m_nEncoderBuffer;
+
+    MapResources(bfrIdx);
+
+    NVENCSTATUS nvStatus = DoEncode(m_vMappedInputBuffers[bfrIdx], m_vBitstreamOutputBuffer[bfrIdx], pPicParams);
+
+    if (nvStatus == NV_ENC_SUCCESS || nvStatus == NV_ENC_ERR_NEED_MORE_INPUT)
+    {
+        m_iToSend++;
+        GetEncodedPacket(m_vBitstreamOutputBuffer, vPacket, true);
+    }
+    else
+    {
+        NVENC_THROW_ERROR("nvEncEncodePicture API failed", nvStatus);
+    }
+}
+
+void NvEncoder::GetSequenceParams(std::vector<uint8_t>& seqParams)
+{
+    uint8_t spsppsData[1024]; // Assume maximum spspps data is 1KB or less
+    memset(spsppsData, 0, sizeof(spsppsData));
+    NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = {};
+    payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
+    uint32_t spsppsSize = 0;
+
+    payload.spsppsBuffer = spsppsData;
+    payload.inBufferSize = sizeof(spsppsData);
+    payload.outSPSPPSPayloadSize = &spsppsSize;
+    NVENC_API_CALL(m_nvenc.nvEncGetSequenceParams(m_hEncoder, &payload));
+    seqParams.clear();
+    seqParams.insert(seqParams.end(), &spsppsData[0], &spsppsData[spsppsSize]);
+}
+
+NVENCSTATUS NvEncoder::DoEncode(NV_ENC_INPUT_PTR inputBuffer, NV_ENC_OUTPUT_PTR outputBuffer, NV_ENC_PIC_PARAMS* pPicParams)
+{
+    NV_ENC_PIC_PARAMS picParams = {};
+    if (pPicParams)
+    {
+        picParams = *pPicParams;
+    }
+    picParams.version = NV_ENC_PIC_PARAMS_VER;
+    picParams.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
+    picParams.inputBuffer = inputBuffer;
+    picParams.bufferFmt = GetPixelFormat();
+    picParams.inputWidth = GetEncodeWidth();
+    picParams.inputHeight = GetEncodeHeight();
+    picParams.outputBitstream = outputBuffer;
+    picParams.completionEvent = GetCompletionEvent(m_iToSend % m_nEncoderBuffer);
+    NVENCSTATUS nvStatus = m_nvenc.nvEncEncodePicture(m_hEncoder, &picParams);
+
+    return nvStatus;
+}
+
+void NvEncoder::SendEOS()
+{
+    NV_ENC_PIC_PARAMS picParams = {};
+    picParams.version = NV_ENC_PIC_PARAMS_VER;
+
+    picParams.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
+    picParams.completionEvent = GetCompletionEvent(m_iToSend % m_nEncoderBuffer);
+    NVENC_API_CALL(m_nvenc.nvEncEncodePicture(m_hEncoder, &picParams));
+}
+
+void NvEncoder::EndEncode(std::vector<std::vector<uint8_t>>& vPacket)
+{
+    vPacket.clear();
+    if (!IsHWEncoderInitialized())
+    {
+        NVENC_THROW_ERROR("Encoder device not initialized", NV_ENC_ERR_ENCODER_NOT_INITIALIZED);
+    }
+
+    SendEOS();
+
+    GetEncodedPacket(m_vBitstreamOutputBuffer, vPacket, false);
+}
+
+void NvEncoder::GetEncodedPacket(std::vector<NV_ENC_OUTPUT_PTR>& vOutputBuffer, std::vector<std::vector<uint8_t>>& vPacket, bool bOutputDelay)
+{
+    unsigned i = 0;
+    int iEnd = bOutputDelay ? m_iToSend - m_nOutputDelay : m_iToSend;
+    for (; m_iGot < iEnd; m_iGot++)
+    {
+        WaitForCompletionEvent(m_iGot % m_nEncoderBuffer);
+        NV_ENC_LOCK_BITSTREAM lockBitstreamData = {};
+        lockBitstreamData.version = NV_ENC_LOCK_BITSTREAM_VER;
+        lockBitstreamData.outputBitstream = vOutputBuffer[m_iGot % m_nEncoderBuffer];
+        lockBitstreamData.doNotWait = false;
+        NVENC_API_CALL(m_nvenc.nvEncLockBitstream(m_hEncoder, &lockBitstreamData));
+
+        uint8_t* pData = (uint8_t*)lockBitstreamData.bitstreamBufferPtr;
+        if (vPacket.size() < i + 1)
+        {
+            vPacket.push_back(std::vector<uint8_t>());
+        }
+        vPacket[i].clear();
+        vPacket[i].insert(vPacket[i].end(), &pData[0], &pData[lockBitstreamData.bitstreamSizeInBytes]);
+        i++;
+
+        NVENC_API_CALL(m_nvenc.nvEncUnlockBitstream(m_hEncoder, lockBitstreamData.outputBitstream));
+
+        if (m_vMappedInputBuffers[m_iGot % m_nEncoderBuffer])
+        {
+            NVENC_API_CALL(m_nvenc.nvEncUnmapInputResource(m_hEncoder, m_vMappedInputBuffers[m_iGot % m_nEncoderBuffer]));
+            m_vMappedInputBuffers[m_iGot % m_nEncoderBuffer] = nullptr;
+        }
+    }
+}
+
+bool NvEncoder::Reconfigure(const NV_ENC_RECONFIGURE_PARAMS* pReconfigureParams)
+{
+    NVENC_API_CALL(m_nvenc.nvEncReconfigureEncoder(m_hEncoder, const_cast<NV_ENC_RECONFIGURE_PARAMS*>(pReconfigureParams)));
+
+    memcpy(&m_initializeParams, &(pReconfigureParams->reInitEncodeParams), sizeof(m_initializeParams));
+    if (pReconfigureParams->reInitEncodeParams.encodeConfig)
+    {
+        memcpy(&m_encodeConfig, pReconfigureParams->reInitEncodeParams.encodeConfig, sizeof(m_encodeConfig));
+    }
+
+    m_nWidth = m_initializeParams.encodeWidth;
+    m_nHeight = m_initializeParams.encodeHeight;
+    m_nMaxEncodeWidth = m_initializeParams.maxEncodeWidth;
+    m_nMaxEncodeHeight = m_initializeParams.maxEncodeHeight;
+
+    return true;
+}
+
+NV_ENC_REGISTERED_PTR NvEncoder::RegisterResource(void* pBuffer, NV_ENC_INPUT_RESOURCE_TYPE eResourceType,
+    int width, int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat, NV_ENC_BUFFER_USAGE bufferUsage,
+    NV_ENC_FENCE_POINT_D3D12* pInputFencePoint, NV_ENC_FENCE_POINT_D3D12* pOutputFencePoint)
+{
+    NV_ENC_REGISTER_RESOURCE registerResource = {};
+    registerResource.version = NV_ENC_REGISTER_RESOURCE_VER;
+    registerResource.resourceType = eResourceType;
+    registerResource.resourceToRegister = pBuffer;
+    registerResource.width = width;
+    registerResource.height = height;
+    registerResource.pitch = pitch;
+    registerResource.bufferFormat = bufferFormat;
+    registerResource.bufferUsage = bufferUsage;
+    registerResource.pInputFencePoint = pInputFencePoint;
+    registerResource.pOutputFencePoint = pOutputFencePoint;
+    NVENC_API_CALL(m_nvenc.nvEncRegisterResource(m_hEncoder, &registerResource));
+
+    return registerResource.registeredResource;
+}
+
+void NvEncoder::RegisterInputResources(std::vector<void*> inputframes, NV_ENC_INPUT_RESOURCE_TYPE eResourceType,
+    int width, int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat, bool bReferenceFrame)
+{
+    for (uint32_t i = 0; i < inputframes.size(); ++i)
+    {
+        NV_ENC_REGISTERED_PTR registeredPtr = RegisterResource(inputframes[i], eResourceType, width, height, pitch, bufferFormat, NV_ENC_INPUT_IMAGE);
+
+        std::vector<uint32_t> _chromaOffsets;
+        NvEncoder::GetChromaSubPlaneOffsets(bufferFormat, pitch, height, _chromaOffsets);
+        NvEncInputFrame inputframe = {};
+        inputframe.inputPtr = (void*)inputframes[i];
+        inputframe.chromaOffsets[0] = 0;
+        inputframe.chromaOffsets[1] = 0;
+        for (uint32_t ch = 0; ch < _chromaOffsets.size(); ch++)
+        {
+            inputframe.chromaOffsets[ch] = _chromaOffsets[ch];
+        }
+        inputframe.numChromaPlanes = NvEncoder::GetNumChromaPlanes(bufferFormat);
+        inputframe.pitch = pitch;
+        inputframe.chromaPitch = NvEncoder::GetChromaPitch(bufferFormat, pitch);
+        inputframe.bufferFormat = bufferFormat;
+        inputframe.resourceType = eResourceType;
+
+        if (bReferenceFrame)
+        {
+            m_vRegisteredResourcesForReference.push_back(registeredPtr);
+            m_vReferenceFrames.push_back(inputframe);
+        }
+        else
+        {
+            m_vRegisteredResources.push_back(registeredPtr);
+            m_vInputFrames.push_back(inputframe);
+        }
+    }
+}
+
+void NvEncoder::FlushEncoder()
+{
+    try
+    {
+        std::vector<std::vector<uint8_t>> vPacket;
+        EndEncode(vPacket);
+    }
+    catch (...)
+    {
+
+    }
+}
+
+void NvEncoder::UnregisterInputResources()
+{
+    FlushEncoder();
+
+    m_vMappedRefBuffers.clear();
+
+    for (uint32_t i = 0; i < m_vMappedInputBuffers.size(); ++i)
+    {
+        if (m_vMappedInputBuffers[i])
+        {
+            m_nvenc.nvEncUnmapInputResource(m_hEncoder, m_vMappedInputBuffers[i]);
+        }
+    }
+    m_vMappedInputBuffers.clear();
+
+    for (uint32_t i = 0; i < m_vRegisteredResources.size(); ++i)
+    {
+        if (m_vRegisteredResources[i])
+        {
+            m_nvenc.nvEncUnregisterResource(m_hEncoder, m_vRegisteredResources[i]);
+        }
+    }
+    m_vRegisteredResources.clear();
+
+
+    for (uint32_t i = 0; i < m_vRegisteredResourcesForReference.size(); ++i)
+    {
+        if (m_vRegisteredResourcesForReference[i])
+        {
+            m_nvenc.nvEncUnregisterResource(m_hEncoder, m_vRegisteredResourcesForReference[i]);
+        }
+    }
+    m_vRegisteredResourcesForReference.clear();
+
+}
+
+
+void NvEncoder::WaitForCompletionEvent(int iEvent)
+{
+#if defined(_WIN32)
+    // Check if we are in async mode. If not, don't wait for event;
+    NV_ENC_CONFIG sEncodeConfig = { 0 };
+    NV_ENC_INITIALIZE_PARAMS sInitializeParams = { 0 };
+    sInitializeParams.encodeConfig = &sEncodeConfig;
+    GetInitializeParams(&sInitializeParams);
+
+    if (0U == sInitializeParams.enableEncodeAsync)
+    {
+        return;
+    }
+#ifdef DEBUG
+    WaitForSingleObject(m_vpCompletionEvent[iEvent], INFINITE);
+#else
+    // wait for 20s which is infinite on terms of gpu time
+    if (WaitForSingleObject(m_vpCompletionEvent[iEvent], 20000) == WAIT_FAILED)
+    {
+        NVENC_THROW_ERROR("Failed to encode frame", NV_ENC_ERR_GENERIC);
+    }
+#endif
+#endif
+}
+
+uint32_t NvEncoder::GetWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t width)
+{
+    switch (bufferFormat) {
+    case NV_ENC_BUFFER_FORMAT_NV12:
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+        return width;
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+        return width * 2;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+        return width * 4;
+    default:
+        NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+    }
+}
+
+uint32_t NvEncoder::GetNumChromaPlanes(const NV_ENC_BUFFER_FORMAT bufferFormat)
+{
+    switch (bufferFormat)
+    {
+    case NV_ENC_BUFFER_FORMAT_NV12:
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+        return 1;
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+        return 2;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+        return 0;
+    default:
+        NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+    }
+}
+
+uint32_t NvEncoder::GetChromaPitch(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaPitch)
+{
+    switch (bufferFormat)
+    {
+    case NV_ENC_BUFFER_FORMAT_NV12:
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+        return lumaPitch;
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+        return (lumaPitch + 1) / 2;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+        return 0;
+    default:
+        NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+    }
+}
+
+void NvEncoder::GetChromaSubPlaneOffsets(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t pitch, const uint32_t height, std::vector<uint32_t>& chromaOffsets)
+{
+    chromaOffsets.clear();
+    switch (bufferFormat)
+    {
+    case NV_ENC_BUFFER_FORMAT_NV12:
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+        chromaOffsets.push_back(pitch * height);
+        return;
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+        chromaOffsets.push_back(pitch * height);
+        chromaOffsets.push_back(chromaOffsets[0] + (NvEncoder::GetChromaPitch(bufferFormat, pitch) * GetChromaHeight(bufferFormat, height)));
+        return;
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+        chromaOffsets.push_back(pitch * height);
+        chromaOffsets.push_back(chromaOffsets[0] + (pitch * height));
+        return;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+        return;
+    default:
+        NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+    }
+}
+
+uint32_t NvEncoder::GetChromaHeight(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaHeight)
+{
+    switch (bufferFormat)
+    {
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+    case NV_ENC_BUFFER_FORMAT_NV12:
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+        return (lumaHeight + 1) / 2;
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+        return lumaHeight;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+        return 0;
+    default:
+        NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+    }
+}
+
+uint32_t NvEncoder::GetChromaWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaWidth)
+{
+    switch (bufferFormat)
+    {
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+        return (lumaWidth + 1) / 2;
+    case NV_ENC_BUFFER_FORMAT_NV12:
+        return lumaWidth;
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+        return 2 * lumaWidth;
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+        return lumaWidth;
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+        return 2 * lumaWidth;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+        return 0;
+    default:
+        NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+    }
+}
+
+
+int NvEncoder::GetCapabilityValue(GUID guidCodec, NV_ENC_CAPS capsToQuery)
+{
+    if (!m_hEncoder)
+    {
+        return 0;
+    }
+    NV_ENC_CAPS_PARAM capsParam = {};
+    capsParam.version = NV_ENC_CAPS_PARAM_VER;
+    capsParam.capsToQuery = capsToQuery;
+    int v;
+    m_nvenc.nvEncGetEncodeCaps(m_hEncoder, guidCodec, &capsParam, &v);
+    return v;
+}
+
+int NvEncoder::GetFrameSize() const
+{
+    switch (GetPixelFormat())
+    {
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+    case NV_ENC_BUFFER_FORMAT_NV12:
+        return GetEncodeWidth() * (GetEncodeHeight() + (GetEncodeHeight() + 1) / 2);
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+        return 2 * GetEncodeWidth() * (GetEncodeHeight() + (GetEncodeHeight() + 1) / 2);
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+        return GetEncodeWidth() * GetEncodeHeight() * 3;
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+        return 2 * GetEncodeWidth() * GetEncodeHeight() * 3;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+        return 4 * GetEncodeWidth() * GetEncodeHeight();
+    default:
+        NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+    }
+}
+
+void NvEncoder::GetInitializeParams(NV_ENC_INITIALIZE_PARAMS* pInitializeParams)
+{
+    if (!pInitializeParams || !pInitializeParams->encodeConfig)
+    {
+        NVENC_THROW_ERROR("Both pInitializeParams and pInitializeParams->encodeConfig can't be NULL", NV_ENC_ERR_INVALID_PTR);
+    }
+    NV_ENC_CONFIG* pEncodeConfig = pInitializeParams->encodeConfig;
+    *pEncodeConfig = m_encodeConfig;
+    *pInitializeParams = m_initializeParams;
+    pInitializeParams->encodeConfig = pEncodeConfig;
+}
+
+void NvEncoder::InitializeBitstreamBuffer()
+{
+    for (int i = 0; i < m_nEncoderBuffer; i++)
+    {
+        NV_ENC_CREATE_BITSTREAM_BUFFER createBitstreamBuffer = {};
+        createBitstreamBuffer.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
+        NVENC_API_CALL(m_nvenc.nvEncCreateBitstreamBuffer(m_hEncoder, &createBitstreamBuffer));
+        m_vBitstreamOutputBuffer[i] = createBitstreamBuffer.bitstreamBuffer;
+    }
+}
+
+void NvEncoder::DestroyBitstreamBuffer()
+{
+    for (uint32_t i = 0; i < m_vBitstreamOutputBuffer.size(); i++)
+    {
+        if (m_vBitstreamOutputBuffer[i])
+        {
+            m_nvenc.nvEncDestroyBitstreamBuffer(m_hEncoder, m_vBitstreamOutputBuffer[i]);
+        }
+    }
+
+    m_vBitstreamOutputBuffer.clear();
+}
+}}
+#endif
\ No newline at end of file
diff --git a/modules/cudacodec/src/NvEncoder.h b/modules/cudacodec/src/NvEncoder.h
new file mode 100644
index 00000000000..c8c281e95a4
--- /dev/null
+++ b/modules/cudacodec/src/NvEncoder.h
@@ -0,0 +1,377 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_NVENCODER_HPP
+#define OPENCV_NVENCODER_HPP
+#include <vector>
+#include "nvEncodeAPI.h"
+#include <stdint.h>
+#include <mutex>
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <string.h>
+
+namespace cv { namespace cudacodec {
+
+#define NVENC_THROW_ERROR( errorStr, errorCode ) \
+do \
+{ \
+cv::String msg = cv::format("%s [Code = %d]", errorStr, errorCode); \
+cv::error(cv::Error::GpuApiCallError, msg, __FUNCTION__, __FILE__, __LINE__); \
+} while (0)
+
+
+#define NVENC_API_CALL( nvencAPI ) \
+do \
+{ \
+NVENCSTATUS errorCode = nvencAPI; \
+if( errorCode != NV_ENC_SUCCESS) \
+{ \
+cv::String msg = cv::format("NVENC returned error [Code = %d]", errorCode); \
+cv::error(cv::Error::GpuApiCallError, msg, __FUNCTION__, __FILE__, __LINE__); \
+} \
+} while (0)
+
+struct NvEncInputFrame
+{
+    void* inputPtr = nullptr;
+    uint32_t chromaOffsets[2];
+    uint32_t numChromaPlanes;
+    uint32_t pitch;
+    uint32_t chromaPitch;
+    NV_ENC_BUFFER_FORMAT bufferFormat;
+    NV_ENC_INPUT_RESOURCE_TYPE resourceType;
+};
+
+/**
+* @brief Shared base class for different encoder interfaces.
+*/
+class NvEncoder
+{
+public:
+    /**
+    * @brief This function is used to initialize the encoder session.
+    * Application must call this function to initialize the encoder, before
+    * starting to encode any frames.
+    */
+    virtual void CreateEncoder(const NV_ENC_INITIALIZE_PARAMS* pEncodeParams);
+
+    /**
+    * @brief This function is used to destroy the encoder session.
+    * Application must call this function to destroy the encoder session and
+    * clean up any allocated resources. The application must call EndEncode()
+    * function to get any queued encoded frames before calling DestroyEncoder().
+    */
+    virtual void DestroyEncoder();
+
+    /**
+    * @brief This function is used to reconfigure an existing encoder session.
+    * Application can use this function to dynamically change the bitrate,
+    * resolution and other QOS parameters. If the application changes the
+    * resolution, it must set NV_ENC_RECONFIGURE_PARAMS::forceIDR.
+    */
+    bool Reconfigure(const NV_ENC_RECONFIGURE_PARAMS* pReconfigureParams);
+
+    /**
+    * @brief This function is used to get the next available input buffer.
+    * Applications must call this function to obtain a pointer to the next
+    * input buffer. The application must copy the uncompressed data to the
+    * input buffer and then call EncodeFrame() function to encode it.
+    */
+    const NvEncInputFrame* GetNextInputFrame();
+
+
+    /**
+    * @brief This function is used to encode a frame.
+    * Applications must call EncodeFrame() function to encode the uncompressed
+    * data, which has been copied to an input buffer obtained from the
+    * GetNextInputFrame() function.
+    */
+    virtual void EncodeFrame(std::vector<std::vector<uint8_t>>& vPacket, NV_ENC_PIC_PARAMS* pPicParams = nullptr);
+
+    /**
+    * @brief This function to flush the encoder queue.
+    * The encoder might be queuing frames for B picture encoding or lookahead;
+    * the application must call EndEncode() to get all the queued encoded frames
+    * from the encoder. The application must call this function before destroying
+    * an encoder session.
+    */
+    virtual void EndEncode(std::vector<std::vector<uint8_t>>& vPacket);
+
+    /**
+    * @brief This function is used to query hardware encoder capabilities.
+    * Applications can call this function to query capabilities like maximum encode
+    * dimensions, support for lookahead or the ME-only mode etc.
+    */
+    int GetCapabilityValue(GUID guidCodec, NV_ENC_CAPS capsToQuery);
+
+    /**
+    * @brief This function is used to get the current device on which encoder is running.
+    */
+    void* GetDevice() const { return m_pDevice; }
+
+    /**
+    * @brief This function is used to get the current device type which encoder is running.
+    */
+    NV_ENC_DEVICE_TYPE GetDeviceType() const { return m_eDeviceType; }
+
+    /**
+    * @brief This function is used to get the current encode width.
+    * The encode width can be modified by Reconfigure() function.
+    */
+    int GetEncodeWidth() const { return m_nWidth; }
+
+    /**
+    * @brief This function is used to get the current encode height.
+    * The encode height can be modified by Reconfigure() function.
+    */
+    int GetEncodeHeight() const { return m_nHeight; }
+
+    /**
+    * @brief This function is used to get the current frame size based on pixel format.
+    */
+    int GetFrameSize() const;
+
+    /**
+    * @brief This function is used to initialize config parameters based on
+    * given codec and preset guids.
+    * The application can call this function to get the default configuration
+    * for a certain preset. The application can either use these parameters
+    * directly or override them with application-specific settings before
+    * using them in CreateEncoder() function.
+    */
+    void CreateDefaultEncoderParams(NV_ENC_INITIALIZE_PARAMS* pIntializeParams, GUID codecGuid, GUID presetGuid, NV_ENC_TUNING_INFO tuningInfo = NV_ENC_TUNING_INFO_UNDEFINED);
+
+    /**
+    * @brief This function is used to get the current initialization parameters,
+    * which had been used to configure the encoder session.
+    * The initialization parameters are modified if the application calls
+    * Reconfigure() function.
+    */
+    void GetInitializeParams(NV_ENC_INITIALIZE_PARAMS* pInitializeParams);
+
+    /**
+    * @brief This function is used to get sequence and picture parameter headers.
+    * Application can call this function after encoder is initialized to get SPS and PPS
+    * nalus for the current encoder instance. The sequence header data might change when
+    * application calls Reconfigure() function.
+    */
+    void GetSequenceParams(std::vector<uint8_t>& seqParams);
+
+    /**
+    * @brief NvEncoder class virtual destructor.
+    */
+    virtual ~NvEncoder();
+
+public:
+    /**
+    * @brief This a static function to get chroma offsets for YUV planar formats.
+    */
+    static void GetChromaSubPlaneOffsets(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t pitch,
+        const uint32_t height, std::vector<uint32_t>& chromaOffsets);
+    /**
+    * @brief This a static function to get the chroma plane pitch for YUV planar formats.
+    */
+    static uint32_t GetChromaPitch(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaPitch);
+
+    /**
+    * @brief This a static function to get the number of chroma planes for YUV planar formats.
+    */
+    static uint32_t GetNumChromaPlanes(const NV_ENC_BUFFER_FORMAT bufferFormat);
+
+    /**
+    * @brief This a static function to get the chroma plane width in bytes for YUV planar formats.
+    */
+    static uint32_t GetChromaWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaWidth);
+
+    /**
+    * @brief This a static function to get the chroma planes height in bytes for YUV planar formats.
+    */
+    static uint32_t GetChromaHeight(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaHeight);
+
+
+    /**
+    * @brief This a static function to get the width in bytes for the frame.
+    * For YUV planar format this is the width in bytes of the luma plane.
+    */
+    static uint32_t GetWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t width);
+
+    /**
+    * @brief This function returns the number of allocated buffers.
+    */
+    uint32_t GetEncoderBufferCount() const { return m_nEncoderBuffer; }
+protected:
+
+    /**
+    * @brief NvEncoder class constructor.
+    * NvEncoder class constructor cannot be called directly by the application.
+    */
+    NvEncoder(NV_ENC_DEVICE_TYPE eDeviceType, void* pDevice, uint32_t nWidth, uint32_t nHeight,
+        NV_ENC_BUFFER_FORMAT eBufferFormat, uint32_t nOutputDelay);
+
+    /**
+    * @brief This function is used to check if hardware encoder is properly initialized.
+    */
+    bool IsHWEncoderInitialized() const { return m_hEncoder != NULL && m_bEncoderInitialized; }
+
+    /**
+    * @brief This function is used to register CUDA, D3D or OpenGL input buffers with NvEncodeAPI.
+    * This is non public function and is called by derived class for allocating
+    * and registering input buffers.
+    */
+    void RegisterInputResources(std::vector<void*> inputframes, NV_ENC_INPUT_RESOURCE_TYPE eResourceType,
+        int width, int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat, bool bReferenceFrame = false);
+
+    /**
+    * @brief This function is used to unregister resources which had been previously registered for encoding
+    * using RegisterInputResources() function.
+    */
+    void UnregisterInputResources();
+
+    /**
+    * @brief This function is used to register CUDA, D3D or OpenGL input or output buffers with NvEncodeAPI.
+    */
+    NV_ENC_REGISTERED_PTR RegisterResource(void* pBuffer, NV_ENC_INPUT_RESOURCE_TYPE eResourceType,
+        int width, int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat, NV_ENC_BUFFER_USAGE bufferUsage = NV_ENC_INPUT_IMAGE,
+        NV_ENC_FENCE_POINT_D3D12* pInputFencePoint = NULL, NV_ENC_FENCE_POINT_D3D12* pOutputFencePoint = NULL);
+
+    /**
+    * @brief This function returns maximum width used to open the encoder session.
+    * All encode input buffers are allocated using maximum dimensions.
+    */
+    uint32_t GetMaxEncodeWidth() const { return m_nMaxEncodeWidth; }
+
+    /**
+    * @brief This function returns maximum height used to open the encoder session.
+    * All encode input buffers are allocated using maximum dimensions.
+    */
+    uint32_t GetMaxEncodeHeight() const { return m_nMaxEncodeHeight; }
+
+    /**
+    * @brief This function returns the completion event.
+    */
+    void* GetCompletionEvent(uint32_t eventIdx) { return (m_vpCompletionEvent.size() == m_nEncoderBuffer) ? m_vpCompletionEvent[eventIdx] : nullptr; }
+
+    /**
+    * @brief This function returns the current pixel format.
+    */
+    NV_ENC_BUFFER_FORMAT GetPixelFormat() const { return m_eBufferFormat; }
+
+    /**
+    * @brief This function is used to submit the encode commands to the
+    * NVENC hardware.
+    */
+    NVENCSTATUS DoEncode(NV_ENC_INPUT_PTR inputBuffer, NV_ENC_OUTPUT_PTR outputBuffer, NV_ENC_PIC_PARAMS* pPicParams);
+
+    /**
+    * @brief This function is used to submit the encode commands to the
+    * NVENC hardware for ME only mode.
+    */
+    //NVENCSTATUS DoMotionEstimation(NV_ENC_INPUT_PTR inputBuffer, NV_ENC_INPUT_PTR inputBufferForReference, NV_ENC_OUTPUT_PTR outputBuffer);
+
+    /**
+    * @brief This function is used to map the input buffers to NvEncodeAPI.
+    */
+    void MapResources(uint32_t bfrIdx);
+
+    /**
+    * @brief This function is used to wait for completion of encode command.
+    */
+    void WaitForCompletionEvent(int iEvent);
+
+    /**
+    * @brief This function is used to send EOS to HW encoder.
+    */
+    void SendEOS();
+
+private:
+    /**
+    * @brief This is a private function which is used to check if there is any
+    buffering done by encoder.
+    * The encoder generally buffers data to encode B frames or for lookahead
+    * or pipelining.
+    */
+    bool IsZeroDelay() { return m_nOutputDelay == 0; }
+
+    /**
+    * @brief This is a private function which is used to load the encode api shared library.
+    */
+    void LoadNvEncApi();
+
+    /**
+    * @brief This is a private function which is used to get the output packets
+    * from the encoder HW.
+    * This is called by DoEncode() function. If there is buffering enabled,
+    * this may return without any output data.
+    */
+    void GetEncodedPacket(std::vector<NV_ENC_OUTPUT_PTR>& vOutputBuffer, std::vector<std::vector<uint8_t>>& vPacket, bool bOutputDelay);
+
+    /**
+    * @brief This is a private function which is used to initialize the bitstream buffers.
+    * This is only used in the encoding mode.
+    */
+    void InitializeBitstreamBuffer();
+
+    /**
+    * @brief This is a private function which is used to destroy the bitstream buffers.
+    * This is only used in the encoding mode.
+    */
+    void DestroyBitstreamBuffer();
+
+    /**
+    * @brief This is a private function which is used to destroy HW encoder.
+    */
+    void DestroyHWEncoder();
+
+    /**
+    * @brief This function is used to flush the encoder queue.
+    */
+    void FlushEncoder();
+
+private:
+    /**
+    * @brief This is a pure virtual function which is used to allocate input buffers.
+    * The derived classes must implement this function.
+    */
+    virtual void AllocateInputBuffers(int32_t numInputBuffers) = 0;
+
+    /**
+    * @brief This is a pure virtual function which is used to destroy input buffers.
+    * The derived classes must implement this function.
+    */
+    virtual void ReleaseInputBuffers() = 0;
+
+protected:
+    void* m_hEncoder = nullptr;
+    NV_ENCODE_API_FUNCTION_LIST m_nvenc;
+    std::vector<NvEncInputFrame> m_vInputFrames;
+    std::vector<NV_ENC_REGISTERED_PTR> m_vRegisteredResources;
+    std::vector<NvEncInputFrame> m_vReferenceFrames;
+    std::vector<NV_ENC_REGISTERED_PTR> m_vRegisteredResourcesForReference;
+    std::vector<NV_ENC_INPUT_PTR> m_vMappedInputBuffers;
+    std::vector<NV_ENC_INPUT_PTR> m_vMappedRefBuffers;
+    std::vector<void*> m_vpCompletionEvent;
+
+    int32_t m_iToSend = 0;
+    int32_t m_iGot = 0;
+    int32_t m_nEncoderBuffer = 0;
+    int32_t m_nOutputDelay = 0;
+
+private:
+    void* m_pDevice;
+    NV_ENC_DEVICE_TYPE m_eDeviceType;
+    uint32_t m_nWidth;
+    uint32_t m_nHeight;
+    uint32_t m_nMaxEncodeWidth = 0;
+    uint32_t m_nMaxEncodeHeight = 0;
+    NV_ENC_BUFFER_FORMAT m_eBufferFormat;
+    NV_ENC_INITIALIZE_PARAMS m_initializeParams = {};
+    NV_ENC_CONFIG m_encodeConfig = {};
+    bool m_bEncoderInitialized = false;
+    uint32_t m_nExtraOutputDelay = 3; // To ensure encode and graphics can work in parallel, m_nExtraOutputDelay should be set to at least 1
+    std::vector<NV_ENC_OUTPUT_PTR> m_vBitstreamOutputBuffer;
+};
+}}
+#endif
\ No newline at end of file
diff --git a/modules/cudacodec/src/NvEncoderCuda.cpp b/modules/cudacodec/src/NvEncoderCuda.cpp
new file mode 100644
index 00000000000..9aae90c2729
--- /dev/null
+++ b/modules/cudacodec/src/NvEncoderCuda.cpp
@@ -0,0 +1,196 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+#include "precomp.hpp"
+
+#if defined(HAVE_NVCUVENC)
+#include "NvEncoderCuda.h"
+
+namespace cv { namespace cudacodec {
+NvEncoderCuda::NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat,
+    uint32_t nExtraOutputDelay) :
+    NvEncoder(NV_ENC_DEVICE_TYPE_CUDA, cuContext, nWidth, nHeight, eBufferFormat, nExtraOutputDelay),
+    m_cuContext(cuContext)
+{
+    if (!m_hEncoder)
+    {
+        NVENC_THROW_ERROR("Encoder Initialization failed", NV_ENC_ERR_INVALID_DEVICE);
+    }
+
+    if (!m_cuContext)
+    {
+        NVENC_THROW_ERROR("Invalid Cuda Context", NV_ENC_ERR_INVALID_DEVICE);
+    }
+}
+
+NvEncoderCuda::~NvEncoderCuda()
+{
+    ReleaseCudaResources();
+}
+
+void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers)
+{
+    if (!IsHWEncoderInitialized())
+    {
+        NVENC_THROW_ERROR("Encoder intialization failed", NV_ENC_ERR_ENCODER_NOT_INITIALIZED);
+    }
+
+    cuSafeCall(cuCtxPushCurrent(m_cuContext));
+    std::vector<void*> inputFrames;
+    for (int i = 0; i < numInputBuffers; i++)
+    {
+        CUdeviceptr pDeviceFrame;
+        uint32_t chromaHeight = GetNumChromaPlanes(GetPixelFormat()) * GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight());
+        if (GetPixelFormat() == NV_ENC_BUFFER_FORMAT_YV12 || GetPixelFormat() == NV_ENC_BUFFER_FORMAT_IYUV)
+            chromaHeight = GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight());
+        cuSafeCall(cuMemAllocPitch((CUdeviceptr*)&pDeviceFrame,
+            &m_cudaPitch,
+            GetWidthInBytes(GetPixelFormat(), GetMaxEncodeWidth()),
+            GetMaxEncodeHeight() + chromaHeight, 16));
+        inputFrames.push_back((void*)pDeviceFrame);
+    }
+    cuSafeCall(cuCtxPopCurrent(NULL));
+
+    RegisterInputResources(inputFrames,
+        NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR,
+        GetMaxEncodeWidth(),
+        GetMaxEncodeHeight(),
+        (int)m_cudaPitch,
+        GetPixelFormat(),
+        false);
+}
+
+void NvEncoderCuda::SetIOCudaStreams(NV_ENC_CUSTREAM_PTR inputStream, NV_ENC_CUSTREAM_PTR outputStream)
+{
+    NVENC_API_CALL(m_nvenc.nvEncSetIOCudaStreams(m_hEncoder, inputStream, outputStream));
+}
+
+void NvEncoderCuda::ReleaseInputBuffers()
+{
+    ReleaseCudaResources();
+}
+
+void NvEncoderCuda::ReleaseCudaResources()
+{
+    if (!m_hEncoder)
+    {
+        return;
+    }
+
+    if (!m_cuContext)
+    {
+        return;
+    }
+
+    UnregisterInputResources();
+
+    cuCtxPushCurrent(m_cuContext);
+
+    for (uint32_t i = 0; i < m_vInputFrames.size(); ++i)
+    {
+        if (m_vInputFrames[i].inputPtr)
+        {
+            cuMemFree(reinterpret_cast<CUdeviceptr>(m_vInputFrames[i].inputPtr));
+        }
+    }
+    m_vInputFrames.clear();
+
+    for (uint32_t i = 0; i < m_vReferenceFrames.size(); ++i)
+    {
+        if (m_vReferenceFrames[i].inputPtr)
+        {
+            cuMemFree(reinterpret_cast<CUdeviceptr>(m_vReferenceFrames[i].inputPtr));
+        }
+    }
+    m_vReferenceFrames.clear();
+
+    cuCtxPopCurrent(NULL);
+    m_cuContext = nullptr;
+}
+
+void NvEncoderCuda::CopyToDeviceFrame(CUcontext device,
+    void* pSrcFrame,
+    uint32_t nSrcPitch,
+    CUdeviceptr pDstFrame,
+    uint32_t dstPitch,
+    int width,
+    int height,
+    CUmemorytype srcMemoryType,
+    NV_ENC_BUFFER_FORMAT pixelFormat,
+    const uint32_t dstChromaOffsets[],
+    uint32_t numChromaPlanes,
+    bool bUnAlignedDeviceCopy,
+    CUstream stream)
+{
+    if (srcMemoryType != CU_MEMORYTYPE_HOST && srcMemoryType != CU_MEMORYTYPE_DEVICE)
+    {
+        NVENC_THROW_ERROR("Invalid source memory type for copy", NV_ENC_ERR_INVALID_PARAM);
+    }
+
+    cuSafeCall(cuCtxPushCurrent(device));
+
+    uint32_t srcPitch = nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width);
+    CUDA_MEMCPY2D m = {};
+    m.srcMemoryType = srcMemoryType;
+    if (srcMemoryType == CU_MEMORYTYPE_HOST)
+    {
+        m.srcHost = pSrcFrame;
+    }
+    else
+    {
+        m.srcDevice = (CUdeviceptr)pSrcFrame;
+    }
+    m.srcPitch = srcPitch;
+    m.dstMemoryType = CU_MEMORYTYPE_DEVICE;
+    m.dstDevice = pDstFrame;
+    m.dstPitch = dstPitch;
+    m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width);
+    m.Height = height;
+    if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE)
+    {
+        cuSafeCall(cuMemcpy2DUnaligned(&m));
+    }
+    else
+    {
+        cuSafeCall(stream == NULL ? cuMemcpy2D(&m) : cuMemcpy2DAsync(&m, stream));
+    }
+
+    std::vector<uint32_t> srcChromaOffsets;
+    NvEncoder::GetChromaSubPlaneOffsets(pixelFormat, srcPitch, height, srcChromaOffsets);
+    uint32_t chromaHeight = NvEncoder::GetChromaHeight(pixelFormat, height);
+    uint32_t destChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, dstPitch);
+    uint32_t srcChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, srcPitch);
+    uint32_t chromaWidthInBytes = NvEncoder::GetChromaWidthInBytes(pixelFormat, width);
+
+    for (uint32_t i = 0; i < numChromaPlanes; ++i)
+    {
+        if (chromaHeight)
+        {
+            if (srcMemoryType == CU_MEMORYTYPE_HOST)
+            {
+                m.srcHost = ((uint8_t*)pSrcFrame + srcChromaOffsets[i]);
+            }
+            else
+            {
+                m.srcDevice = (CUdeviceptr)((uint8_t*)pSrcFrame + srcChromaOffsets[i]);
+            }
+            m.srcPitch = srcChromaPitch;
+
+            m.dstDevice = (CUdeviceptr)((uint8_t*)pDstFrame + dstChromaOffsets[i]);
+            m.dstPitch = destChromaPitch;
+            m.WidthInBytes = chromaWidthInBytes;
+            m.Height = chromaHeight;
+            if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE)
+            {
+                cuSafeCall(cuMemcpy2DUnaligned(&m));
+            }
+            else
+            {
+                cuSafeCall(stream == NULL ? cuMemcpy2D(&m) : cuMemcpy2DAsync(&m, stream));
+            }
+        }
+    }
+    cuSafeCall(cuCtxPopCurrent(NULL));
+}
+}}
+#endif
\ No newline at end of file
diff --git a/modules/cudacodec/src/NvEncoderCuda.h b/modules/cudacodec/src/NvEncoderCuda.h
new file mode 100644
index 00000000000..55788dc7f96
--- /dev/null
+++ b/modules/cudacodec/src/NvEncoderCuda.h
@@ -0,0 +1,75 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_NVENCODERCUDA_HPP
+#define OPENCV_NVENCODERCUDA_HPP
+#include <vector>
+#include <stdint.h>
+#include <mutex>
+#include <cuda.h>
+#include "NvEncoder.h"
+
+namespace cv { namespace cudacodec {
+
+/**
+* @brief Encoder for CUDA device memory.
+*/
+class NvEncoderCuda : public NvEncoder
+{
+public:
+    NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat,
+        uint32_t nExtraOutputDelay = 3);
+    virtual ~NvEncoderCuda();
+
+    /**
+    * @brief This is a static function to copy input data from host memory to device memory.
+    * This function assumes YUV plane is a single contiguous memory segment.
+    */
+    static void CopyToDeviceFrame(CUcontext device,
+        void* pSrcFrame,
+        uint32_t nSrcPitch,
+        CUdeviceptr pDstFrame,
+        uint32_t dstPitch,
+        int width,
+        int height,
+        CUmemorytype srcMemoryType,
+        NV_ENC_BUFFER_FORMAT pixelFormat,
+        const uint32_t dstChromaOffsets[],
+        uint32_t numChromaPlanes,
+        bool bUnAlignedDeviceCopy = false,
+        CUstream stream = NULL);
+
+    /**
+    * @brief This function sets input and output CUDA streams
+    */
+    void SetIOCudaStreams(NV_ENC_CUSTREAM_PTR inputStream, NV_ENC_CUSTREAM_PTR outputStream);
+
+protected:
+    /**
+    * @brief This function is used to release the input buffers allocated for encoding.
+    * This function is an override of virtual function NvEncoder::ReleaseInputBuffers().
+    */
+    virtual void ReleaseInputBuffers() override;
+
+private:
+    /**
+    * @brief This function is used to allocate input buffers for encoding.
+    * This function is an override of virtual function NvEncoder::AllocateInputBuffers().
+    */
+    virtual void AllocateInputBuffers(int32_t numInputBuffers) override;
+
+private:
+    /**
+    * @brief This is a private function to release CUDA device memory used for encoding.
+    */
+    void ReleaseCudaResources();
+
+protected:
+    CUcontext m_cuContext;
+
+private:
+    size_t m_cudaPitch = 0;
+};
+}}
+#endif
\ No newline at end of file
diff --git a/modules/cudacodec/src/precomp.hpp b/modules/cudacodec/src/precomp.hpp
index ca3c68958da..99a788a0128 100644
--- a/modules/cudacodec/src/precomp.hpp
+++ b/modules/cudacodec/src/precomp.hpp
@@ -57,32 +57,38 @@
 #include "opencv2/core/private.cuda.hpp"
 #include <opencv2/core/utils/logger.hpp>
 
-#ifdef HAVE_NVCUVID
-    #if defined(HAVE_DYNLINK_NVCUVID_HEADER)
-        #include <dynlink_nvcuvid.h>
-    #elif defined(HAVE_NVCUVID_HEADER)
-        #include <nvcuvid.h>
-    #endif
-
-    #ifdef _WIN32
+#if defined(HAVE_NVCUVID) || defined(HAVE_NVCUVENC)
+    #if _WIN32
         #define NOMINMAX
-        #include <windows.h>
-        #ifdef HAVE_NVCUVENC
-            #include <NVEncoderAPI.h>
-        #endif
-    #else
-        #include <pthread.h>
-        #include <unistd.h>
     #endif
+    #if defined(HAVE_NVCUVID)
+        #if defined(HAVE_DYNLINK_NVCUVID_HEADER)
+            #include <dynlink_nvcuvid.h>
+        #elif defined(HAVE_NVCUVID_HEADER)
+            #include <nvcuvid.h>
+        #endif
 
-    #include "thread.hpp"
-    #include "video_source.hpp"
-    #include "ffmpeg_video_source.hpp"
-    #include "cuvid_video_source.hpp"
-    #include "frame_queue.hpp"
-    #include "video_decoder.hpp"
-    #include "video_parser.hpp"
+        #ifdef _WIN32
+            #include <windows.h>
+        #else
+            #include <pthread.h>
+            #include <unistd.h>
+        #endif
 
+        #include "thread.hpp"
+        #include "video_source.hpp"
+        #include "ffmpeg_video_source.hpp"
+        #include "cuvid_video_source.hpp"
+        #include "frame_queue.hpp"
+        #include "video_decoder.hpp"
+        #include "video_parser.hpp"
+    #endif
+    #if defined(HAVE_NVCUVENC)
+        #include <fstream>
+        #include <nvEncodeAPI.h>
+        #include "NvEncoderCuda.h"
+        #include <opencv2/cudaimgproc.hpp>
+    #endif
 #endif
 
 #endif /* OPENCV_PRECOMP_H */
diff --git a/modules/cudacodec/src/video_reader.cpp b/modules/cudacodec/src/video_reader.cpp
index c484f23b23a..a566bd4de71 100644
--- a/modules/cudacodec/src/video_reader.cpp
+++ b/modules/cudacodec/src/video_reader.cpp
@@ -54,6 +54,7 @@ Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>&, co
 #else // HAVE_NVCUVID
 
 void nv12ToBgra(const GpuMat& decodedFrame, GpuMat& outFrame, int width, int height, cudaStream_t stream);
+bool ValidColorFormat(const ColorFormat colorFormat);
 
 void videoDecPostProcessFrame(const GpuMat& decodedFrame, GpuMat& outFrame, int width, int height, const ColorFormat colorFormat,
     Stream stream)
@@ -74,7 +75,7 @@ void videoDecPostProcessFrame(const GpuMat& decodedFrame, GpuMat& outFrame, int
         outFrame.create(height, width, CV_8UC1);
         cudaMemcpy2DAsync(outFrame.ptr(), outFrame.step, decodedFrame.ptr(), decodedFrame.step, width, height, cudaMemcpyDeviceToDevice, StreamAccessor::getStream(stream));
     }
-    else if (colorFormat == ColorFormat::YUV) {
+    else if (colorFormat == ColorFormat::NV_NV12) {
         decodedFrame.copyTo(outFrame, stream);
     }
 }
@@ -100,7 +101,7 @@ namespace
 
         bool set(const VideoReaderProps propertyId, const double propertyVal) CV_OVERRIDE;
 
-        void set(const ColorFormat _colorFormat) CV_OVERRIDE;
+        bool set(const ColorFormat colorFormat_) CV_OVERRIDE;
 
         bool get(const VideoReaderProps propertyId, double& propertyVal) const CV_OVERRIDE;
         bool getVideoReaderProps(const VideoReaderProps propertyId, double& propertyValOut, double propertyValIn) const CV_OVERRIDE;
@@ -273,8 +274,16 @@ namespace
         return false;
     }
 
-    void VideoReaderImpl::set(const ColorFormat _colorFormat) {
-        colorFormat = _colorFormat;
+    bool ValidColorFormat(const ColorFormat colorFormat) {
+        if (colorFormat == ColorFormat::BGRA || colorFormat == ColorFormat::BGR || colorFormat == ColorFormat::GRAY || colorFormat == ColorFormat::NV_NV12)
+            return true;
+        return false;
+    }
+
+    bool VideoReaderImpl::set(const ColorFormat colorFormat_) {
+        if (!ValidColorFormat(colorFormat_)) return false;
+        colorFormat = colorFormat_;
+        return true;
     }
 
     bool VideoReaderImpl::get(const VideoReaderProps propertyId, double& propertyVal) const {
diff --git a/modules/cudacodec/src/video_writer.cpp b/modules/cudacodec/src/video_writer.cpp
index ce3b68fb2a8..cd184580ef3 100644
--- a/modules/cudacodec/src/video_writer.cpp
+++ b/modules/cudacodec/src/video_writer.cpp
@@ -43,874 +43,351 @@
 
 #include "precomp.hpp"
 
-using namespace cv;
+namespace cv { namespace cudacodec {
 using namespace cv::cuda;
-using namespace cv::cudacodec;
 
-#if !defined(HAVE_NVCUVENC) || !defined(_WIN32)
+#if !defined(HAVE_NVCUVENC)
 
-cv::cudacodec::EncoderParams::EncoderParams() { throw_no_cuda(); }
-cv::cudacodec::EncoderParams::EncoderParams(const String&) { throw_no_cuda(); }
-void cv::cudacodec::EncoderParams::load(const String&) { throw_no_cuda(); }
-void cv::cudacodec::EncoderParams::save(const String&) const { throw_no_cuda(); }
+Ptr<cudacodec::VideoWriter> createVideoWriter(const String&, const Size, const Codec, const double, const ColorFormat, const Ptr<EncoderCallback>, const cv::cuda::Stream&) { throw_no_cuda(); return Ptr<cv::cudacodec::VideoWriter>(); }
+Ptr<cudacodec::VideoWriter> createVideoWriter(const String&, const Size, const Codec, const double, const ColorFormat, const EncoderParams&, const Ptr<EncoderCallback>, const cv::cuda::Stream&) { throw_no_cuda(); return Ptr<cv::cudacodec::VideoWriter>(); }
 
-Ptr<cv::cudacodec::VideoWriter> cv::cudacodec::createVideoWriter(const String&, Size, double, SurfaceFormat) { throw_no_cuda(); return Ptr<cv::cudacodec::VideoWriter>(); }
-Ptr<cv::cudacodec::VideoWriter> cv::cudacodec::createVideoWriter(const String&, Size, double, const EncoderParams&, SurfaceFormat) { throw_no_cuda(); return Ptr<cv::cudacodec::VideoWriter>(); }
+#else // !defined HAVE_NVCUVENC
 
-Ptr<cv::cudacodec::VideoWriter> cv::cudacodec::createVideoWriter(const Ptr<EncoderCallBack>&, Size, double, SurfaceFormat) { throw_no_cuda(); return Ptr<cv::cudacodec::VideoWriter>(); }
-Ptr<cv::cudacodec::VideoWriter> cv::cudacodec::createVideoWriter(const Ptr<EncoderCallBack>&, Size, double, const EncoderParams&, SurfaceFormat) { throw_no_cuda(); return Ptr<cv::cudacodec::VideoWriter>(); }
+NV_ENC_BUFFER_FORMAT EncBufferFormat(const ColorFormat colorFormat);
+int NChannels(const ColorFormat colorFormat);
+GUID CodecGuid(const Codec codec);
+void FrameRate(const double fps, uint32_t& frameRateNum, uint32_t& frameRateDen);
+GUID EncodingProfileGuid(const EncodeProfile encodingProfile);
+GUID EncodingPresetGuid(const EncodePreset nvPreset);
+bool Equal(const GUID& g1, const GUID& g2);
 
-#else // !defined HAVE_NVCUVENC || !defined _WIN32
-
-void RGB_to_YV12(const GpuMat& src, GpuMat& dst);
-
-///////////////////////////////////////////////////////////////////////////
-// VideoWriterImpl
-
-namespace
+EncoderParams::EncoderParams() : nvPreset(ENC_PRESET_P3), tuningInfo(ENC_TUNING_INFO_HIGH_QUALITY), encodingProfile(ENC_CODEC_PROFILE_AUTOSELECT),
+    rateControlMode(ENC_PARAMS_RC_VBR), multiPassEncoding(ENC_MULTI_PASS_DISABLED), constQp({ 0,0,0 }), averageBitRate(0), maxBitRate(0),
+    targetQuality(30), gopLength(0)
 {
-    class NVEncoderWrapper
-    {
-    public:
-        NVEncoderWrapper() : encoder_(0)
-        {
-            int err;
-
-            err = NVGetHWEncodeCaps();
-            if (err)
-                CV_Error(Error::GpuNotSupported, "No CUDA capability present");
-
-            // Create the Encoder API Interface
-            err = NVCreateEncoder(&encoder_);
-            CV_Assert( err == 0 );
-        }
-
-        ~NVEncoderWrapper()
-        {
-            if (encoder_)
-                NVDestroyEncoder(encoder_);
-        }
-
-        operator NVEncoder() const
-        {
-            return encoder_;
-        }
-
-    private:
-        NVEncoder encoder_;
-    };
-
-    enum CodecType
-    {
-        MPEG1, // not supported yet
-        MPEG2, // not supported yet
-        MPEG4, // not supported yet
-        H264
-    };
-
-    class VideoWriterImpl : public VideoWriter
-    {
-    public:
-        VideoWriterImpl(const Ptr<EncoderCallBack>& callback, Size frameSize, double fps, SurfaceFormat format, CodecType codec = H264);
-        VideoWriterImpl(const Ptr<EncoderCallBack>& callback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format, CodecType codec = H264);
-
-        void write(InputArray frame, bool lastFrame = false);
-
-        EncoderParams getEncoderParams() const;
-
-    private:
-        void initEncoder(double fps);
-        void setEncodeParams(const EncoderParams& params);
-        void initGpuMemory();
-        void initCallBacks();
-        void createHWEncoder();
-
-        Ptr<EncoderCallBack> callback_;
-        Size frameSize_;
-
-        CodecType codec_;
-        SurfaceFormat inputFormat_;
-        NVVE_SurfaceFormat surfaceFormat_;
-
-        NVEncoderWrapper encoder_;
-
-        GpuMat videoFrame_;
-        CUvideoctxlock cuCtxLock_;
+};
 
-        // CallBacks
-
-        static unsigned char* NVENCAPI HandleAcquireBitStream(int* pBufferSize, void* pUserdata);
-        static void NVENCAPI HandleReleaseBitStream(int nBytesInBuffer, unsigned char* cb, void* pUserdata);
-        static void NVENCAPI HandleOnBeginFrame(const NVVE_BeginFrameInfo* pbfi, void* pUserdata);
-        static void NVENCAPI HandleOnEndFrame(const NVVE_EndFrameInfo* pefi, void* pUserdata);
-    };
+bool operator==(const EncoderParams& lhs, const EncoderParams& rhs)
+{
+    return std::tie(lhs.nvPreset, lhs.tuningInfo, lhs.encodingProfile, lhs.rateControlMode, lhs.multiPassEncoding, lhs.constQp.qpInterB, lhs.constQp.qpInterP, lhs.constQp.qpIntra,
+        lhs.averageBitRate, lhs.maxBitRate, lhs.targetQuality, lhs.gopLength) == std::tie(rhs.nvPreset, rhs.tuningInfo, rhs.encodingProfile, rhs.rateControlMode, rhs.multiPassEncoding, rhs.constQp.qpInterB, rhs.constQp.qpInterP, rhs.constQp.qpIntra,
+            rhs.averageBitRate, rhs.maxBitRate, rhs.targetQuality, rhs.gopLength);
+};
 
-    VideoWriterImpl::VideoWriterImpl(const Ptr<EncoderCallBack>& callback, Size frameSize, double fps, SurfaceFormat format, CodecType codec) :
-        callback_(callback),
-        frameSize_(frameSize),
-        codec_(codec),
-        inputFormat_(format),
-        cuCtxLock_(0)
-    {
-        surfaceFormat_ = (inputFormat_ == SF_BGR ? YV12 : static_cast<NVVE_SurfaceFormat>(inputFormat_));
+class RawVideoWriter : public EncoderCallback
+{
+public:
+    RawVideoWriter(String fileName);
+    ~RawVideoWriter();
+    void onEncoded(std::vector<std::vector<uint8_t>> vPacket);
+    void onEncodingFinished();
+private:
+    std::ofstream fpOut;
+};
+
+RawVideoWriter::RawVideoWriter(String fileName) {
+    fpOut = std::ofstream(fileName, std::ios::out | std::ios::binary);
+    if (!fpOut)
+        CV_Error(Error::StsError, "Failed to open video file " + fileName + " for writing!");
+}
 
-        initEncoder(fps);
+void RawVideoWriter::onEncodingFinished() {
+    fpOut.close();
+}
 
-        initGpuMemory();
+RawVideoWriter::~RawVideoWriter() {
+    onEncodingFinished();
+}
 
-        initCallBacks();
+void RawVideoWriter::onEncoded(std::vector<std::vector<uint8_t>> vPacket) {
+    for (auto& packet : vPacket)
+        fpOut.write(reinterpret_cast<char*>(packet.data()), packet.size());
+}
 
-        createHWEncoder();
+class VideoWriterImpl : public VideoWriter
+{
+public:
+    VideoWriterImpl(const Ptr<EncoderCallback>& videoWriter, const Size frameSize, const Codec codec, const double fps,
+        const ColorFormat colorFormat, const Stream& stream = Stream::Null());
+    VideoWriterImpl(const Ptr<EncoderCallback>& videoWriter, const Size frameSize, const Codec codec, const double fps,
+        const ColorFormat colorFormat, const EncoderParams& encoderParams, const Stream& stream = Stream::Null());
+    ~VideoWriterImpl();
+    void write(InputArray frame);
+    EncoderParams getEncoderParams() const;
+    void release();
+private:
+    void Init(const Codec codec, const double fps, const Size frameSz);
+    void InitializeEncoder(const GUID codec, const double fps);
+    void CopyToNvSurface(const InputArray src);
+
+    Ptr<EncoderCallback> encoderCallback;
+    ColorFormat colorFormat = ColorFormat::UNDEFINED;
+    NV_ENC_BUFFER_FORMAT surfaceFormat = NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_UNDEFINED;
+    EncoderParams encoderParams;
+    Stream stream = Stream::Null();
+    Ptr<NvEncoderCuda> pEnc;
+    std::vector<std::vector<uint8_t>> vPacket;
+    int nSrcChannels = 0;
+    CUcontext cuContext;
+};
+
+NV_ENC_BUFFER_FORMAT EncBufferFormat(const ColorFormat colorFormat) {
+    switch (colorFormat) {
+    case ColorFormat::BGR: return NV_ENC_BUFFER_FORMAT_ARGB;
+    case ColorFormat::RGB: return NV_ENC_BUFFER_FORMAT_ABGR;
+    case ColorFormat::BGRA: return NV_ENC_BUFFER_FORMAT_ARGB;
+    case ColorFormat::RGBA: return NV_ENC_BUFFER_FORMAT_ABGR;
+    case ColorFormat::GRAY:
+    case ColorFormat::NV_NV12: return NV_ENC_BUFFER_FORMAT_NV12;
+    case ColorFormat::NV_YV12: return NV_ENC_BUFFER_FORMAT_YV12;
+    case ColorFormat::NV_IYUV: return NV_ENC_BUFFER_FORMAT_IYUV;
+    case ColorFormat::NV_YUV444: return NV_ENC_BUFFER_FORMAT_YUV444;
+    case ColorFormat::NV_AYUV: return NV_ENC_BUFFER_FORMAT_AYUV;
+    default: return NV_ENC_BUFFER_FORMAT_UNDEFINED;
     }
+}
 
-    VideoWriterImpl::VideoWriterImpl(const Ptr<EncoderCallBack>& callback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format, CodecType codec) :
-        callback_(callback),
-        frameSize_(frameSize),
-        codec_(codec),
-        inputFormat_(format),
-        cuCtxLock_(0)
-    {
-        surfaceFormat_ = (inputFormat_ == SF_BGR ? YV12 : static_cast<NVVE_SurfaceFormat>(inputFormat_));
-
-        initEncoder(fps);
-
-        setEncodeParams(params);
-
-        initGpuMemory();
-
-        initCallBacks();
-
-        createHWEncoder();
+int NChannels(const ColorFormat colorFormat) {
+    switch (colorFormat) {
+    case ColorFormat::BGR:
+    case ColorFormat::RGB:
+    case ColorFormat::NV_IYUV:
+    case ColorFormat::NV_YUV444: return 3;
+    case ColorFormat::RGBA:
+    case ColorFormat::BGRA:
+    case ColorFormat::NV_AYUV: return 4;
+    case ColorFormat::GRAY:
+    case ColorFormat::NV_NV12:
+    case ColorFormat::NV_YV12: return 1;
+    default: return 0;
     }
+}
 
-    void VideoWriterImpl::initEncoder(double fps)
-    {
-        int err;
-
-        // Set codec
-
-        static const unsigned long codecs_id[] =
-        {
-            NV_CODEC_TYPE_MPEG1, NV_CODEC_TYPE_MPEG2, NV_CODEC_TYPE_MPEG4, NV_CODEC_TYPE_H264, NV_CODEC_TYPE_VC1
-        };
-        err = NVSetCodec(encoder_, codecs_id[codec_]);
-        if (err)
-            CV_Error(Error::StsNotImplemented, "Codec format is not supported");
-
-        // Set default params
-
-        err = NVSetDefaultParam(encoder_);
-        CV_Assert( err == 0 );
-
-        // Set some common params
-
-        int inputSize[] = { frameSize_.width, frameSize_.height };
-        err = NVSetParamValue(encoder_, NVVE_IN_SIZE, &inputSize);
-        CV_Assert( err == 0 );
-        err = NVSetParamValue(encoder_, NVVE_OUT_SIZE, &inputSize);
-        CV_Assert( err == 0 );
-
-        int aspectRatio[] = { frameSize_.width, frameSize_.height, ASPECT_RATIO_DAR };
-        err = NVSetParamValue(encoder_, NVVE_ASPECT_RATIO, &aspectRatio);
-        CV_Assert( err == 0 );
-
-        // FPS
-
-        int frame_rate = static_cast<int>(fps + 0.5);
-        int frame_rate_base = 1;
-        while (fabs(static_cast<double>(frame_rate) / frame_rate_base) - fps > 0.001)
-        {
-            frame_rate_base *= 10;
-            frame_rate = static_cast<int>(fps*frame_rate_base + 0.5);
-        }
-        int FrameRate[] = { frame_rate, frame_rate_base };
-        err = NVSetParamValue(encoder_, NVVE_FRAME_RATE, &FrameRate);
-        CV_Assert( err == 0 );
-
-        // Select device for encoding
-
-        int gpuID = getDevice();
-        err = NVSetParamValue(encoder_, NVVE_FORCE_GPU_SELECTION, &gpuID);
-        CV_Assert( err == 0 );
+VideoWriterImpl::VideoWriterImpl(const Ptr<EncoderCallback>& encoderCallBack_, const Size frameSz, const Codec codec, const double fps,
+    const ColorFormat colorFormat_, const EncoderParams& encoderParams_, const Stream& stream_) :
+    encoderCallback(encoderCallBack_), colorFormat(colorFormat_), encoderParams(encoderParams_), stream(stream_)
+{
+    CV_Assert(colorFormat != ColorFormat::UNDEFINED);
+    surfaceFormat = EncBufferFormat(colorFormat);
+    if (surfaceFormat == NV_ENC_BUFFER_FORMAT_UNDEFINED) {
+        String msg = cv::format("Unsupported input surface format: %i", colorFormat);
+        CV_LOG_WARNING(NULL, msg);
+        CV_Error(Error::StsUnsupportedFormat, msg);
     }
+    nSrcChannels = NChannels(colorFormat);
+    Init(codec, fps, frameSz);
+}
 
-    void VideoWriterImpl::setEncodeParams(const EncoderParams& params)
-    {
-        int err;
-
-        int P_Interval = params.P_Interval;
-        err = NVSetParamValue(encoder_, NVVE_P_INTERVAL, &P_Interval);
-        CV_Assert( err == 0 );
-
-        int IDR_Period = params.IDR_Period;
-        err = NVSetParamValue(encoder_, NVVE_IDR_PERIOD, &IDR_Period);
-        CV_Assert( err == 0 );
-
-        int DynamicGOP = params.DynamicGOP;
-        err = NVSetParamValue(encoder_, NVVE_DYNAMIC_GOP, &DynamicGOP);
-        CV_Assert( err == 0 );
-
-        NVVE_RateCtrlType RCType = static_cast<NVVE_RateCtrlType>(params.RCType);
-        err = NVSetParamValue(encoder_, NVVE_RC_TYPE, &RCType);
-        CV_Assert( err == 0 );
-
-        int AvgBitrate = params.AvgBitrate;
-        err = NVSetParamValue(encoder_, NVVE_AVG_BITRATE, &AvgBitrate);
-        CV_Assert( err == 0 );
-
-        int PeakBitrate = params.PeakBitrate;
-        err = NVSetParamValue(encoder_, NVVE_PEAK_BITRATE, &PeakBitrate);
-        CV_Assert( err == 0 );
-
-        int QP_Level_Intra = params.QP_Level_Intra;
-        err = NVSetParamValue(encoder_, NVVE_QP_LEVEL_INTRA, &QP_Level_Intra);
-        CV_Assert( err == 0 );
-
-        int QP_Level_InterP = params.QP_Level_InterP;
-        err = NVSetParamValue(encoder_, NVVE_QP_LEVEL_INTER_P, &QP_Level_InterP);
-        CV_Assert( err == 0 );
-
-        int QP_Level_InterB = params.QP_Level_InterB;
-        err = NVSetParamValue(encoder_, NVVE_QP_LEVEL_INTER_B, &QP_Level_InterB);
-        CV_Assert( err == 0 );
-
-        int DeblockMode = params.DeblockMode;
-        err = NVSetParamValue(encoder_, NVVE_DEBLOCK_MODE, &DeblockMode);
-        CV_Assert( err == 0 );
-
-        int ProfileLevel = params.ProfileLevel;
-        err = NVSetParamValue(encoder_, NVVE_PROFILE_LEVEL, &ProfileLevel);
-        CV_Assert( err == 0 );
-
-        int ForceIntra = params.ForceIntra;
-        err = NVSetParamValue(encoder_, NVVE_FORCE_INTRA, &ForceIntra);
-        CV_Assert( err == 0 );
-
-        int ForceIDR = params.ForceIDR;
-        err = NVSetParamValue(encoder_, NVVE_FORCE_IDR, &ForceIDR);
-        CV_Assert( err == 0 );
-
-        int ClearStat = params.ClearStat;
-        err = NVSetParamValue(encoder_, NVVE_CLEAR_STAT, &ClearStat);
-        CV_Assert( err == 0 );
-
-        NVVE_DI_MODE DIMode = static_cast<NVVE_DI_MODE>(params.DIMode);
-        err = NVSetParamValue(encoder_, NVVE_SET_DEINTERLACE, &DIMode);
-        CV_Assert( err == 0 );
-
-        if (params.Presets != -1)
-        {
-            NVVE_PRESETS_TARGET Presets = static_cast<NVVE_PRESETS_TARGET>(params.Presets);
-            err = NVSetParamValue(encoder_, NVVE_PRESETS, &Presets);
-            CV_Assert( err == 0 );
-        }
+VideoWriterImpl::VideoWriterImpl(const Ptr<EncoderCallback>& encoderCallback, const Size frameSz, const Codec codec, const double fps,
+    const ColorFormat colorFormat, const Stream& stream) :
+    VideoWriterImpl(encoderCallback, frameSz, codec, fps, colorFormat, EncoderParams(), stream)
+{
+}
 
-        int DisableCabac = params.DisableCabac;
-        err = NVSetParamValue(encoder_, NVVE_DISABLE_CABAC, &DisableCabac);
-        CV_Assert( err == 0 );
+void VideoWriterImpl::release() {
+    pEnc->EndEncode(vPacket);
+    encoderCallback->onEncoded(vPacket);
+    encoderCallback->onEncodingFinished();
+}
 
-        int NaluFramingType = params.NaluFramingType;
-        err = NVSetParamValue(encoder_, NVVE_CONFIGURE_NALU_FRAMING_TYPE, &NaluFramingType);
-        CV_Assert( err == 0 );
+VideoWriterImpl::~VideoWriterImpl() {
+    release();
+}
 
-        int DisableSPSPPS = params.DisableSPSPPS;
-        err = NVSetParamValue(encoder_, NVVE_DISABLE_SPS_PPS, &DisableSPSPPS);
-        CV_Assert( err == 0 );
+GUID CodecGuid(const Codec codec) {
+    switch (codec) {
+    case Codec::H264: return NV_ENC_CODEC_H264_GUID;
+    case Codec::HEVC: return NV_ENC_CODEC_HEVC_GUID;
+    default: break;
     }
+    std::string msg = "Unknown codec: cudacodec::VideoWriter only supports CODEC_VW::H264 and CODEC_VW::HEVC";
+    CV_LOG_WARNING(NULL, msg);
+    CV_Error(Error::StsUnsupportedFormat, msg);
+}
 
-    EncoderParams VideoWriterImpl::getEncoderParams() const
-    {
-        int err;
-
-        EncoderParams params;
-
-        int P_Interval;
-        err = NVGetParamValue(encoder_, NVVE_P_INTERVAL, &P_Interval);
-        CV_Assert( err == 0 );
-        params.P_Interval = P_Interval;
-
-        int IDR_Period;
-        err = NVGetParamValue(encoder_, NVVE_IDR_PERIOD, &IDR_Period);
-        CV_Assert( err == 0 );
-        params.IDR_Period = IDR_Period;
-
-        int DynamicGOP;
-        err = NVGetParamValue(encoder_, NVVE_DYNAMIC_GOP, &DynamicGOP);
-        CV_Assert( err == 0 );
-        params.DynamicGOP = DynamicGOP;
-
-        NVVE_RateCtrlType RCType;
-        err = NVGetParamValue(encoder_, NVVE_RC_TYPE, &RCType);
-        CV_Assert( err == 0 );
-        params.RCType = RCType;
-
-        int AvgBitrate;
-        err = NVGetParamValue(encoder_, NVVE_AVG_BITRATE, &AvgBitrate);
-        CV_Assert( err == 0 );
-        params.AvgBitrate = AvgBitrate;
-
-        int PeakBitrate;
-        err = NVGetParamValue(encoder_, NVVE_PEAK_BITRATE, &PeakBitrate);
-        CV_Assert( err == 0 );
-        params.PeakBitrate = PeakBitrate;
-
-        int QP_Level_Intra;
-        err = NVGetParamValue(encoder_, NVVE_QP_LEVEL_INTRA, &QP_Level_Intra);
-        CV_Assert( err == 0 );
-        params.QP_Level_Intra = QP_Level_Intra;
-
-        int QP_Level_InterP;
-        err = NVGetParamValue(encoder_, NVVE_QP_LEVEL_INTER_P, &QP_Level_InterP);
-        CV_Assert( err == 0 );
-        params.QP_Level_InterP = QP_Level_InterP;
-
-        int QP_Level_InterB;
-        err = NVGetParamValue(encoder_, NVVE_QP_LEVEL_INTER_B, &QP_Level_InterB);
-        CV_Assert( err == 0 );
-        params.QP_Level_InterB = QP_Level_InterB;
-
-        int DeblockMode;
-        err = NVGetParamValue(encoder_, NVVE_DEBLOCK_MODE, &DeblockMode);
-        CV_Assert( err == 0 );
-        params.DeblockMode = DeblockMode;
-
-        int ProfileLevel;
-        err = NVGetParamValue(encoder_, NVVE_PROFILE_LEVEL, &ProfileLevel);
-        CV_Assert( err == 0 );
-        params.ProfileLevel = ProfileLevel;
-
-        int ForceIntra;
-        err = NVGetParamValue(encoder_, NVVE_FORCE_INTRA, &ForceIntra);
-        CV_Assert( err == 0 );
-        params.ForceIntra = ForceIntra;
-
-        int ForceIDR;
-        err = NVGetParamValue(encoder_, NVVE_FORCE_IDR, &ForceIDR);
-        CV_Assert( err == 0 );
-        params.ForceIDR = ForceIDR;
-
-        int ClearStat;
-        err = NVGetParamValue(encoder_, NVVE_CLEAR_STAT, &ClearStat);
-        CV_Assert( err == 0 );
-        params.ClearStat = ClearStat;
-
-        NVVE_DI_MODE DIMode;
-        err = NVGetParamValue(encoder_, NVVE_SET_DEINTERLACE, &DIMode);
-        CV_Assert( err == 0 );
-        params.DIMode = DIMode;
-
-        params.Presets = -1;
-
-        int DisableCabac;
-        err = NVGetParamValue(encoder_, NVVE_DISABLE_CABAC, &DisableCabac);
-        CV_Assert( err == 0 );
-        params.DisableCabac = DisableCabac;
-
-        int NaluFramingType;
-        err = NVGetParamValue(encoder_, NVVE_CONFIGURE_NALU_FRAMING_TYPE, &NaluFramingType);
-        CV_Assert( err == 0 );
-        params.NaluFramingType = NaluFramingType;
-
-        int DisableSPSPPS;
-        err = NVGetParamValue(encoder_, NVVE_DISABLE_SPS_PPS, &DisableSPSPPS);
-        CV_Assert( err == 0 );
-        params.DisableSPSPPS = DisableSPSPPS;
-
-        return params;
+void VideoWriterImpl::Init(const Codec codec, const double fps, const Size frameSz) {
+    // init context
+    GpuMat temp(1, 1, CV_8UC1);
+    temp.release();
+    cuSafeCall(cuCtxGetCurrent(&cuContext));
+    CV_Assert(nSrcChannels != 0);
+    const GUID codecGuid = CodecGuid(codec);
+    try {
+        pEnc = new NvEncoderCuda(cuContext, frameSz.width, frameSz.height, surfaceFormat);
+        InitializeEncoder(codecGuid, fps);
+        const cudaStream_t cudaStream = cuda::StreamAccessor::getStream(stream);
+        pEnc->SetIOCudaStreams((NV_ENC_CUSTREAM_PTR)&cudaStream, (NV_ENC_CUSTREAM_PTR)&cudaStream);
     }
-
-    void VideoWriterImpl::initGpuMemory()
+    catch (cv::Exception& e)
     {
-        int err;
-
-        // initialize context
-        GpuMat temp(1, 1, CV_8U);
-        temp.release();
-
-        static const int bpp[] =
-        {
-            16, // UYVY, 4:2:2
-            16, // YUY2, 4:2:2
-            12, // YV12, 4:2:0
-            12, // NV12, 4:2:0
-            12, // IYUV, 4:2:0
-        };
-
-        CUcontext cuContext;
-        cuSafeCall( cuCtxGetCurrent(&cuContext) );
-
-        // Allocate the CUDA memory Pitched Surface
-        if (surfaceFormat_ == UYVY || surfaceFormat_ == YUY2)
-            videoFrame_.create(frameSize_.height, (frameSize_.width * bpp[surfaceFormat_]) / 8, CV_8UC1);
-        else
-            videoFrame_.create((frameSize_.height * bpp[surfaceFormat_]) / 8, frameSize_.width, CV_8UC1);
-
-        // Create the Video Context Lock (used for synchronization)
-        cuSafeCall( cuvidCtxLockCreate(&cuCtxLock_, cuContext) );
-
-        // If we are using GPU Device Memory with NVCUVENC, it is necessary to create a
-        // CUDA Context with a Context Lock cuvidCtxLock.  The Context Lock needs to be passed to NVCUVENC
-
-        int iUseDeviceMem = 1;
-        err = NVSetParamValue(encoder_, NVVE_DEVICE_MEMORY_INPUT, &iUseDeviceMem);
-        CV_Assert( err == 0 );
-
-        err = NVSetParamValue(encoder_, NVVE_DEVICE_CTX_LOCK, &cuCtxLock_);
-        CV_Assert( err == 0 );
+        String msg = String("Error initializing Nvidia Encoder. Refer to Nvidia's GPU Support Matrix to confirm your GPU supports hardware encoding, ") +
+            String("codec and surface format and check the encoder documentation to verify your choice of encoding paramaters are supported.") +
+            e.msg;
+        CV_Error(Error::GpuApiCallError, msg);
     }
+    const Size encoderFrameSz(pEnc->GetEncodeWidth(), pEnc->GetEncodeHeight());
+    CV_Assert(frameSz == encoderFrameSz);
+}
 
-    void VideoWriterImpl::initCallBacks()
-    {
-        NVVE_CallbackParams cb;
-        memset(&cb, 0, sizeof(NVVE_CallbackParams));
-
-        cb.pfnacquirebitstream = HandleAcquireBitStream;
-        cb.pfnonbeginframe     = HandleOnBeginFrame;
-        cb.pfnonendframe       = HandleOnEndFrame;
-        cb.pfnreleasebitstream = HandleReleaseBitStream;
-
-        NVRegisterCB(encoder_, cb, this);
+void FrameRate(const double fps, uint32_t& frameRateNum, uint32_t& frameRateDen) {
+    CV_Assert(fps >= 0);
+    int frame_rate = (int)(fps + 0.5);
+    int frame_rate_base = 1;
+    while (fabs(((double)frame_rate / frame_rate_base) - fps) > 0.001) {
+        frame_rate_base *= 10;
+        frame_rate = (int)(fps * frame_rate_base + 0.5);
     }
+    frameRateNum = frame_rate;
+    frameRateDen = frame_rate_base;
+}
 
-    void VideoWriterImpl::createHWEncoder()
-    {
-        int err;
-
-        // Create the NVIDIA HW resources for Encoding on NVIDIA hardware
-        err = NVCreateHWEncoder(encoder_);
-        CV_Assert( err == 0 );
+GUID EncodingProfileGuid(const EncodeProfile encodingProfile) {
+    switch (encodingProfile) {
+    case(ENC_CODEC_PROFILE_AUTOSELECT): return NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID;
+    case(ENC_H264_PROFILE_BASELINE): return NV_ENC_H264_PROFILE_BASELINE_GUID;
+    case(ENC_H264_PROFILE_MAIN): return NV_ENC_H264_PROFILE_MAIN_GUID;
+    case(ENC_H264_PROFILE_HIGH): return NV_ENC_H264_PROFILE_HIGH_GUID;
+    case(ENC_H264_PROFILE_HIGH_444): return NV_ENC_H264_PROFILE_HIGH_444_GUID;
+    case(ENC_H264_PROFILE_STEREO): return NV_ENC_H264_PROFILE_STEREO_GUID;
+    case(ENC_H264_PROFILE_PROGRESSIVE_HIGH): return NV_ENC_H264_PROFILE_PROGRESSIVE_HIGH_GUID;
+    case(ENC_H264_PROFILE_CONSTRAINED_HIGH): return NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID;
+    case(ENC_HEVC_PROFILE_MAIN): return NV_ENC_HEVC_PROFILE_MAIN_GUID;
+    case(ENC_HEVC_PROFILE_MAIN10): return NV_ENC_HEVC_PROFILE_MAIN10_GUID;
+    case(ENC_HEVC_PROFILE_FREXT): return NV_ENC_HEVC_PROFILE_FREXT_GUID;
+    default: break;
     }
+    std::string msg = "Unknown Encoding Profile.";
+    CV_LOG_WARNING(NULL, msg);
+    CV_Error(Error::StsUnsupportedFormat, msg);
+}
 
-    // UYVY/YUY2 are both 4:2:2 formats (16bpc)
-    // Luma, U, V are interleaved, chroma is subsampled (w/2,h)
-    void copyUYVYorYUY2Frame(Size frameSize, const GpuMat& src, GpuMat& dst)
-    {
-        // Source is YUVY/YUY2 4:2:2, the YUV data in a packed and interleaved
-
-        // YUV Copy setup
-        CUDA_MEMCPY2D stCopyYUV422;
-        memset(&stCopyYUV422, 0, sizeof(CUDA_MEMCPY2D));
-
-        stCopyYUV422.srcXInBytes          = 0;
-        stCopyYUV422.srcY                 = 0;
-        stCopyYUV422.srcMemoryType        = CU_MEMORYTYPE_DEVICE;
-        stCopyYUV422.srcHost              = 0;
-        stCopyYUV422.srcDevice            = (CUdeviceptr) src.data;
-        stCopyYUV422.srcArray             = 0;
-        stCopyYUV422.srcPitch             = src.step;
-
-        stCopyYUV422.dstXInBytes          = 0;
-        stCopyYUV422.dstY                 = 0;
-        stCopyYUV422.dstMemoryType        = CU_MEMORYTYPE_DEVICE;
-        stCopyYUV422.dstHost              = 0;
-        stCopyYUV422.dstDevice            = (CUdeviceptr) dst.data;
-        stCopyYUV422.dstArray             = 0;
-        stCopyYUV422.dstPitch             = dst.step;
-
-        stCopyYUV422.WidthInBytes         = frameSize.width * 2;
-        stCopyYUV422.Height               = frameSize.height;
-
-        // DMA Luma/Chroma
-        cuSafeCall( cuMemcpy2D(&stCopyYUV422) );
+GUID EncodingPresetGuid(const EncodePreset nvPreset) {
+    switch (nvPreset) {
+    case ENC_PRESET_P1: return NV_ENC_PRESET_P1_GUID;
+    case ENC_PRESET_P2: return NV_ENC_PRESET_P2_GUID;
+    case ENC_PRESET_P3: return NV_ENC_PRESET_P3_GUID;
+    case ENC_PRESET_P4: return NV_ENC_PRESET_P4_GUID;
+    case ENC_PRESET_P5: return NV_ENC_PRESET_P5_GUID;
+    case ENC_PRESET_P6: return NV_ENC_PRESET_P6_GUID;
+    case ENC_PRESET_P7: return NV_ENC_PRESET_P7_GUID;
+    default: break;
     }
+    std::string msg = "Unknown Nvidia Encoding Preset.";
+    CV_LOG_WARNING(NULL, msg);
+    CV_Error(Error::StsUnsupportedFormat, msg);
+}
 
-    // YV12/IYUV are both 4:2:0 planar formats (12bpc)
-    // Luma, U, V chroma planar (12bpc), chroma is subsampled (w/2,h/2)
-    void copyYV12orIYUVFrame(Size frameSize, const GpuMat& src, GpuMat& dst)
-    {
-        // Source is YV12/IYUV, this native format is converted to NV12 format by the video encoder
-
-        // (1) luma copy setup
-        CUDA_MEMCPY2D stCopyLuma;
-        memset(&stCopyLuma, 0, sizeof(CUDA_MEMCPY2D));
-
-        stCopyLuma.srcXInBytes          = 0;
-        stCopyLuma.srcY                 = 0;
-        stCopyLuma.srcMemoryType        = CU_MEMORYTYPE_DEVICE;
-        stCopyLuma.srcHost              = 0;
-        stCopyLuma.srcDevice            = (CUdeviceptr) src.data;
-        stCopyLuma.srcArray             = 0;
-        stCopyLuma.srcPitch             = src.step;
-
-        stCopyLuma.dstXInBytes          = 0;
-        stCopyLuma.dstY                 = 0;
-        stCopyLuma.dstMemoryType        = CU_MEMORYTYPE_DEVICE;
-        stCopyLuma.dstHost              = 0;
-        stCopyLuma.dstDevice            = (CUdeviceptr) dst.data;
-        stCopyLuma.dstArray             = 0;
-        stCopyLuma.dstPitch             = dst.step;
-
-        stCopyLuma.WidthInBytes         = frameSize.width;
-        stCopyLuma.Height               = frameSize.height;
-
-        // (2) chroma copy setup, U/V can be done together
-        CUDA_MEMCPY2D stCopyChroma;
-        memset(&stCopyChroma, 0, sizeof(CUDA_MEMCPY2D));
-
-        stCopyChroma.srcXInBytes        = 0;
-        stCopyChroma.srcY               = frameSize.height << 1; // U/V chroma offset
-        stCopyChroma.srcMemoryType      = CU_MEMORYTYPE_DEVICE;
-        stCopyChroma.srcHost            = 0;
-        stCopyChroma.srcDevice          = (CUdeviceptr) src.data;
-        stCopyChroma.srcArray           = 0;
-        stCopyChroma.srcPitch           = src.step >> 1; // chroma is subsampled by 2 (but it has U/V are next to each other)
-
-        stCopyChroma.dstXInBytes        = 0;
-        stCopyChroma.dstY               = frameSize.height << 1; // chroma offset (srcY*srcPitch now points to the chroma planes)
-        stCopyChroma.dstMemoryType      = CU_MEMORYTYPE_DEVICE;
-        stCopyChroma.dstHost            = 0;
-        stCopyChroma.dstDevice          = (CUdeviceptr) dst.data;
-        stCopyChroma.dstArray           = 0;
-        stCopyChroma.dstPitch           = dst.step >> 1;
-
-        stCopyChroma.WidthInBytes       = frameSize.width >> 1;
-        stCopyChroma.Height             = frameSize.height; // U/V are sent together
-
-        // DMA Luma
-        cuSafeCall( cuMemcpy2D(&stCopyLuma) );
-
-        // DMA Chroma channels (UV side by side)
-        cuSafeCall( cuMemcpy2D(&stCopyChroma) );
-    }
+bool Equal(const GUID& g1, const GUID& g2) {
+    if (std::tie(g1.Data1, g1.Data2, g1.Data3, g1.Data4) == std::tie(g2.Data1, g2.Data2, g2.Data3, g2.Data4))
+        return true;
+    return false;
+}
 
-    // NV12 is 4:2:0 format (12bpc)
-    // Luma followed by U/V chroma interleaved (12bpc), chroma is subsampled (w/2,h/2)
-    void copyNV12Frame(Size frameSize, const GpuMat& src, GpuMat& dst)
-    {
-        // Source is NV12 in pitch linear memory
-        // Because we are assume input is NV12 (if we take input in the native format), the encoder handles NV12 as a native format in pitch linear memory
-
-        // Luma/Chroma can be done in a single transfer
-        CUDA_MEMCPY2D stCopyNV12;
-        memset(&stCopyNV12, 0, sizeof(CUDA_MEMCPY2D));
-
-        stCopyNV12.srcXInBytes          = 0;
-        stCopyNV12.srcY                 = 0;
-        stCopyNV12.srcMemoryType        = CU_MEMORYTYPE_DEVICE;
-        stCopyNV12.srcHost              = 0;
-        stCopyNV12.srcDevice            = (CUdeviceptr) src.data;
-        stCopyNV12.srcArray             = 0;
-        stCopyNV12.srcPitch             = src.step;
-
-        stCopyNV12.dstXInBytes          = 0;
-        stCopyNV12.dstY                 = 0;
-        stCopyNV12.dstMemoryType        = CU_MEMORYTYPE_DEVICE;
-        stCopyNV12.dstHost              = 0;
-        stCopyNV12.dstDevice            = (CUdeviceptr) dst.data;
-        stCopyNV12.dstArray             = 0;
-        stCopyNV12.dstPitch             = dst.step;
-
-        stCopyNV12.WidthInBytes         = frameSize.width;
-        stCopyNV12.Height               = (frameSize.height * 3) >> 1;
-
-        // DMA Luma/Chroma
-        cuSafeCall( cuMemcpy2D(&stCopyNV12) );
-    }
+void VideoWriterImpl::InitializeEncoder(const GUID codec, const double fps)
+{
+    NV_ENC_INITIALIZE_PARAMS initializeParams = {};
+    initializeParams.version = NV_ENC_INITIALIZE_PARAMS_VER;
+    NV_ENC_CONFIG encodeConfig = {};
+    encodeConfig.version = NV_ENC_CONFIG_VER;
+    initializeParams.encodeConfig = &encodeConfig;
+    pEnc->CreateDefaultEncoderParams(&initializeParams, codec, EncodingPresetGuid(encoderParams.nvPreset), (NV_ENC_TUNING_INFO)encoderParams.tuningInfo);
+    FrameRate(fps, initializeParams.frameRateNum, initializeParams.frameRateDen);
+    initializeParams.encodeConfig->profileGUID = EncodingProfileGuid(encoderParams.encodingProfile);
+    initializeParams.encodeConfig->rcParams.rateControlMode = (NV_ENC_PARAMS_RC_MODE)(encoderParams.rateControlMode + encoderParams.multiPassEncoding);
+    initializeParams.encodeConfig->rcParams.constQP = { encoderParams.constQp.qpInterB, encoderParams.constQp.qpInterB,encoderParams.constQp.qpInterB };
+    initializeParams.encodeConfig->rcParams.averageBitRate = encoderParams.averageBitRate;
+    initializeParams.encodeConfig->rcParams.maxBitRate = encoderParams.maxBitRate;
+    initializeParams.encodeConfig->rcParams.targetQuality = encoderParams.targetQuality;
+    initializeParams.encodeConfig->gopLength = encoderParams.gopLength;
+    if (Equal(codec, NV_ENC_CODEC_H264_GUID))
+        initializeParams.encodeConfig->encodeCodecConfig.h264Config.idrPeriod = encoderParams.gopLength;
+    else if (Equal(codec, NV_ENC_CODEC_HEVC_GUID))
+        initializeParams.encodeConfig->encodeCodecConfig.hevcConfig.idrPeriod = encoderParams.gopLength;
+    pEnc->CreateEncoder(&initializeParams);
+}
 
-    void VideoWriterImpl::write(InputArray _frame, bool lastFrame)
-    {
-        GpuMat frame = _frame.getGpuMat();
+inline bool CvFormat(const ColorFormat cf) {
+    if (cf == ColorFormat::BGR || cf == ColorFormat::RGB || cf == ColorFormat::BGRA || cf == ColorFormat::RGBA || cf == ColorFormat::GRAY)
+        return true;
+    return false;
+}
 
-        if (inputFormat_ == SF_BGR)
-        {
-            CV_Assert( frame.size() == frameSize_ );
-            CV_Assert( frame.type() == CV_8UC1 || frame.type() == CV_8UC3 || frame.type() == CV_8UC4 );
-        }
-        else
-        {
-            CV_Assert( frame.size() == videoFrame_.size() );
-            CV_Assert( frame.type() == videoFrame_.type() );
+void VideoWriterImpl::CopyToNvSurface(const InputArray src)
+{
+    const NvEncInputFrame* encoderInputFrame = pEnc->GetNextInputFrame();
+    CV_Assert(src.isGpuMat() || src.isMat());
+    if (CvFormat(colorFormat))
+        CV_Assert(src.size() == Size(pEnc->GetEncodeWidth(), pEnc->GetEncodeHeight()));
+    Npp8u* dst = (Npp8u*)encoderInputFrame->inputPtr;
+    if (colorFormat == ColorFormat::BGR || colorFormat == ColorFormat::RGB) {
+        GpuMat srcDevice;
+        if (src.isGpuMat())
+            srcDevice = src.getGpuMat();
+        else {
+            if (stream)
+                srcDevice.upload(src, stream);
+            else
+                srcDevice.upload(src);
         }
-
-        NVVE_EncodeFrameParams efparams;
-        efparams.Width = frameSize_.width;
-        efparams.Height = frameSize_.height;
-        efparams.Pitch = static_cast<int>(videoFrame_.step);
-        efparams.SurfFmt = surfaceFormat_;
-        efparams.PictureStruc = FRAME_PICTURE;
-        efparams.topfieldfirst =  0;
-        efparams.repeatFirstField = 0;
-        efparams.progressiveFrame = (surfaceFormat_ == NV12) ? 1 : 0;
-        efparams.bLast = lastFrame;
-        efparams.picBuf = 0; // Must be set to NULL in order to support device memory input
-
-        // Don't forget we need to lock/unlock between memcopies
-        cuSafeCall( cuvidCtxLock(cuCtxLock_, 0) );
-
-        if (inputFormat_ == SF_BGR)
-        {
-            RGB_to_YV12(frame, videoFrame_);
+        if (colorFormat == ColorFormat::BGR) {
+            GpuMat dstGpuMat(pEnc->GetEncodeHeight(), pEnc->GetEncodeWidth(), CV_8UC4, dst, encoderInputFrame->pitch);
+            cuda::cvtColor(srcDevice, dstGpuMat, COLOR_BGR2BGRA, 0, stream);
         }
-        else
-        {
-            switch (surfaceFormat_)
-            {
-            case UYVY: // UYVY (4:2:2)
-            case YUY2: // YUY2 (4:2:2)
-                copyUYVYorYUY2Frame(frameSize_, frame, videoFrame_);
-                break;
-
-            case YV12: // YV12 (4:2:0), Y V U
-            case IYUV: // IYUV (4:2:0), Y U V
-                copyYV12orIYUVFrame(frameSize_, frame, videoFrame_);
-                break;
-
-            case NV12: // NV12 (4:2:0)
-                copyNV12Frame(frameSize_, frame, videoFrame_);
-                break;
-            }
+        else {
+            GpuMat dstGpuMat(pEnc->GetEncodeHeight(), pEnc->GetEncodeWidth(), CV_8UC4, dst, encoderInputFrame->pitch);
+            cuda::cvtColor(srcDevice, dstGpuMat, COLOR_RGB2RGBA, 0, stream);
         }
-
-        cuSafeCall( cuvidCtxUnlock(cuCtxLock_, 0) );
-
-        int err = NVEncodeFrame(encoder_, &efparams, 0, videoFrame_.data);
-        CV_Assert( err == 0 );
-    }
-
-    unsigned char* NVENCAPI VideoWriterImpl::HandleAcquireBitStream(int* pBufferSize, void* pUserdata)
-    {
-        VideoWriterImpl* thiz = static_cast<VideoWriterImpl*>(pUserdata);
-
-        return thiz->callback_->acquireBitStream(pBufferSize);
-    }
-
-    void NVENCAPI VideoWriterImpl::HandleReleaseBitStream(int nBytesInBuffer, unsigned char* cb, void* pUserdata)
-    {
-        VideoWriterImpl* thiz = static_cast<VideoWriterImpl*>(pUserdata);
-
-        thiz->callback_->releaseBitStream(cb, nBytesInBuffer);
     }
-
-    void NVENCAPI VideoWriterImpl::HandleOnBeginFrame(const NVVE_BeginFrameInfo* pbfi, void* pUserdata)
-    {
-        VideoWriterImpl* thiz = static_cast<VideoWriterImpl*>(pUserdata);
-
-        thiz->callback_->onBeginFrame(pbfi->nFrameNumber, static_cast<EncoderCallBack::PicType>(pbfi->nPicType));
-    }
-
-    void NVENCAPI VideoWriterImpl::HandleOnEndFrame(const NVVE_EndFrameInfo* pefi, void* pUserdata)
-    {
-        VideoWriterImpl* thiz = static_cast<VideoWriterImpl*>(pUserdata);
-
-        thiz->callback_->onEndFrame(pefi->nFrameNumber, static_cast<EncoderCallBack::PicType>(pefi->nPicType));
-    }
-
-    ///////////////////////////////////////////////////////////////////////////
-    // FFMPEG
-
-    class EncoderCallBackFFMPEG : public EncoderCallBack
-    {
-    public:
-        EncoderCallBackFFMPEG(const String& fileName, Size frameSize, double fps);
-        ~EncoderCallBackFFMPEG();
-
-        unsigned char* acquireBitStream(int* bufferSize);
-        void releaseBitStream(unsigned char* data, int size);
-        void onBeginFrame(int frameNumber, PicType picType);
-        void onEndFrame(int frameNumber, PicType picType);
-
-    private:
-        static bool init_MediaStream_FFMPEG();
-
-        struct OutputMediaStream_FFMPEG* stream_;
-        std::vector<uchar> buf_;
-        bool isKeyFrame_;
-
-        static Create_OutputMediaStream_FFMPEG_Plugin create_OutputMediaStream_FFMPEG_p;
-        static Release_OutputMediaStream_FFMPEG_Plugin release_OutputMediaStream_FFMPEG_p;
-        static Write_OutputMediaStream_FFMPEG_Plugin write_OutputMediaStream_FFMPEG_p;
-    };
-
-    Create_OutputMediaStream_FFMPEG_Plugin EncoderCallBackFFMPEG::create_OutputMediaStream_FFMPEG_p = 0;
-    Release_OutputMediaStream_FFMPEG_Plugin EncoderCallBackFFMPEG::release_OutputMediaStream_FFMPEG_p = 0;
-    Write_OutputMediaStream_FFMPEG_Plugin EncoderCallBackFFMPEG::write_OutputMediaStream_FFMPEG_p = 0;
-
-    bool EncoderCallBackFFMPEG::init_MediaStream_FFMPEG()
-    {
-        static bool initialized = false;
-
-        if (!initialized)
-        {
-            #if defined(_WIN32)
-                const char* module_name = "opencv_ffmpeg"
-                    CVAUX_STR(CV_VERSION_MAJOR) CVAUX_STR(CV_VERSION_MINOR) CVAUX_STR(CV_VERSION_REVISION)
-                #if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__)
-                    "_64"
-                #endif
-                    ".dll";
-
-                static HMODULE cvFFOpenCV = LoadLibrary(module_name);
-
-                if (cvFFOpenCV)
-                {
-                    create_OutputMediaStream_FFMPEG_p =
-                        (Create_OutputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "create_OutputMediaStream_FFMPEG");
-                    release_OutputMediaStream_FFMPEG_p =
-                        (Release_OutputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "release_OutputMediaStream_FFMPEG");
-                    write_OutputMediaStream_FFMPEG_p =
-                        (Write_OutputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "write_OutputMediaStream_FFMPEG");
-
-                    initialized = create_OutputMediaStream_FFMPEG_p != 0 && release_OutputMediaStream_FFMPEG_p != 0 && write_OutputMediaStream_FFMPEG_p != 0;
-                }
-            #elif defined(HAVE_FFMPEG)
-                create_OutputMediaStream_FFMPEG_p = create_OutputMediaStream_FFMPEG;
-                release_OutputMediaStream_FFMPEG_p = release_OutputMediaStream_FFMPEG;
-                write_OutputMediaStream_FFMPEG_p = write_OutputMediaStream_FFMPEG;
-
-                initialized = true;
-            #endif
+    else if (colorFormat == ColorFormat::GRAY) {
+        const cudaMemcpyKind memcpyKind = src.isGpuMat() ? cudaMemcpyDeviceToDevice : cudaMemcpyHostToDevice;
+        const void* srcPtr = src.isGpuMat() ? src.getGpuMat().data : src.getMat().data;
+        const size_t srcPitch = src.isGpuMat() ? src.getGpuMat().step : src.getMat().step;
+        const uint32_t chromaHeight = NvEncoder::GetChromaHeight(NV_ENC_BUFFER_FORMAT_NV12, pEnc->GetEncodeHeight());
+        if (stream) {
+            cudaMemcpy2DAsync(dst, encoderInputFrame->pitch, srcPtr, srcPitch, pEnc->GetEncodeWidth(), pEnc->GetEncodeHeight(), memcpyKind,
+                cuda::StreamAccessor::getStream(stream));
+            cudaMemset2DAsync(&dst[encoderInputFrame->pitch * pEnc->GetEncodeHeight()], encoderInputFrame->pitch, 128, pEnc->GetEncodeWidth(), chromaHeight,
+                cuda::StreamAccessor::getStream(stream));
+        }
+        else {
+            cudaMemcpy2D(dst, encoderInputFrame->pitch, srcPtr, srcPitch, pEnc->GetEncodeWidth(), pEnc->GetEncodeHeight(), memcpyKind);
+            cudaMemset2D(&dst[encoderInputFrame->pitch * pEnc->GetEncodeHeight()], encoderInputFrame->pitch, 128, pEnc->GetEncodeWidth(), chromaHeight);
         }
-
-        return initialized;
-    }
-
-    EncoderCallBackFFMPEG::EncoderCallBackFFMPEG(const String& fileName, Size frameSize, double fps) :
-        stream_(0), isKeyFrame_(false)
-    {
-        int buf_size = std::max(frameSize.area() * 4, 1024 * 1024);
-        buf_.resize(buf_size);
-
-        CV_Assert( init_MediaStream_FFMPEG() );
-
-        stream_ = create_OutputMediaStream_FFMPEG_p(fileName.c_str(), frameSize.width, frameSize.height, fps);
-        CV_Assert( stream_ != 0 );
-    }
-
-    EncoderCallBackFFMPEG::~EncoderCallBackFFMPEG()
-    {
-        release_OutputMediaStream_FFMPEG_p(stream_);
-    }
-
-    unsigned char* EncoderCallBackFFMPEG::acquireBitStream(int* bufferSize)
-    {
-        *bufferSize = static_cast<int>(buf_.size());
-        return &buf_[0];
-    }
-
-    void EncoderCallBackFFMPEG::releaseBitStream(unsigned char* data, int size)
-    {
-        write_OutputMediaStream_FFMPEG_p(stream_, data, size, isKeyFrame_);
-    }
-
-    void EncoderCallBackFFMPEG::onBeginFrame(int frameNumber, PicType picType)
-    {
-        CV_UNUSED(frameNumber);
-        isKeyFrame_ = (picType == IFRAME);
     }
-
-    void EncoderCallBackFFMPEG::onEndFrame(int frameNumber, PicType picType)
-    {
-        CV_UNUSED(frameNumber);
-        CV_UNUSED(picType);
+    else {
+        void* srcPtr = src.isGpuMat() ? src.getGpuMat().data : src.getMat().data;
+        const CUmemorytype cuMemoryType = src.isGpuMat() ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST;
+        NvEncoderCuda::CopyToDeviceFrame(cuContext, srcPtr, static_cast<unsigned>(src.step()), (CUdeviceptr)encoderInputFrame->inputPtr, (int)encoderInputFrame->pitch, pEnc->GetEncodeWidth(),
+            pEnc->GetEncodeHeight(), cuMemoryType, encoderInputFrame->bufferFormat, encoderInputFrame->chromaOffsets, encoderInputFrame->numChromaPlanes,
+            false, cuda::StreamAccessor::getStream(stream));
     }
 }
 
-///////////////////////////////////////////////////////////////////////////
-// EncoderParams
-
-cv::cudacodec::EncoderParams::EncoderParams()
-{
-    P_Interval = 3;
-    IDR_Period = 15;
-    DynamicGOP = 0;
-    RCType = 1;
-    AvgBitrate = 4000000;
-    PeakBitrate = 10000000;
-    QP_Level_Intra = 25;
-    QP_Level_InterP = 28;
-    QP_Level_InterB = 31;
-    DeblockMode = 1;
-    ProfileLevel = 65357;
-    ForceIntra = 0;
-    ForceIDR = 0;
-    ClearStat = 0;
-    DIMode = 1;
-    Presets = 2;
-    DisableCabac = 0;
-    NaluFramingType = 0;
-    DisableSPSPPS = 0;
-}
-
-cv::cudacodec::EncoderParams::EncoderParams(const String& configFile)
-{
-    load(configFile);
-}
-
-void cv::cudacodec::EncoderParams::load(const String& configFile)
-{
-    FileStorage fs(configFile, FileStorage::READ);
-    CV_Assert( fs.isOpened() );
-
-    read(fs["P_Interval"     ], P_Interval, 3);
-    read(fs["IDR_Period"     ], IDR_Period, 15);
-    read(fs["DynamicGOP"     ], DynamicGOP, 0);
-    read(fs["RCType"         ], RCType, 1);
-    read(fs["AvgBitrate"     ], AvgBitrate, 4000000);
-    read(fs["PeakBitrate"    ], PeakBitrate, 10000000);
-    read(fs["QP_Level_Intra" ], QP_Level_Intra, 25);
-    read(fs["QP_Level_InterP"], QP_Level_InterP, 28);
-    read(fs["QP_Level_InterB"], QP_Level_InterB, 31);
-    read(fs["DeblockMode"    ], DeblockMode, 1);
-    read(fs["ProfileLevel"   ], ProfileLevel, 65357);
-    read(fs["ForceIntra"     ], ForceIntra, 0);
-    read(fs["ForceIDR"       ], ForceIDR, 0);
-    read(fs["ClearStat"      ], ClearStat, 0);
-    read(fs["DIMode"         ], DIMode, 1);
-    read(fs["Presets"        ], Presets, 2);
-    read(fs["DisableCabac"   ], DisableCabac, 0);
-    read(fs["NaluFramingType"], NaluFramingType, 0);
-    read(fs["DisableSPSPPS"  ], DisableSPSPPS, 0);
-}
-
-void cv::cudacodec::EncoderParams::save(const String& configFile) const
-{
-    FileStorage fs(configFile, FileStorage::WRITE);
-    CV_Assert( fs.isOpened() );
-
-    write(fs, "P_Interval"     , P_Interval);
-    write(fs, "IDR_Period"     , IDR_Period);
-    write(fs, "DynamicGOP"     , DynamicGOP);
-    write(fs, "RCType"         , RCType);
-    write(fs, "AvgBitrate"     , AvgBitrate);
-    write(fs, "PeakBitrate"    , PeakBitrate);
-    write(fs, "QP_Level_Intra" , QP_Level_Intra);
-    write(fs, "QP_Level_InterP", QP_Level_InterP);
-    write(fs, "QP_Level_InterB", QP_Level_InterB);
-    write(fs, "DeblockMode"    , DeblockMode);
-    write(fs, "ProfileLevel"   , ProfileLevel);
-    write(fs, "ForceIntra"     , ForceIntra);
-    write(fs, "ForceIDR"       , ForceIDR);
-    write(fs, "ClearStat"      , ClearStat);
-    write(fs, "DIMode"         , DIMode);
-    write(fs, "Presets"        , Presets);
-    write(fs, "DisableCabac"   , DisableCabac);
-    write(fs, "NaluFramingType", NaluFramingType);
-    write(fs, "DisableSPSPPS"  , DisableSPSPPS);
-}
+void VideoWriterImpl::write(const InputArray frame) {
+    CV_Assert(frame.channels() == nSrcChannels);
+    CopyToNvSurface(frame);
+    pEnc->EncodeFrame(vPacket);
+    encoderCallback->onEncoded(vPacket);
+};
 
-///////////////////////////////////////////////////////////////////////////
-// createVideoWriter
+EncoderParams VideoWriterImpl::getEncoderParams() const {
+    return encoderParams;
+};
 
-Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const String& fileName, Size frameSize, double fps, SurfaceFormat format)
+Ptr<VideoWriter> createVideoWriter(const String& fileName, const Size frameSize, const Codec codec, const double fps, const ColorFormat colorFormat,
+    Ptr<EncoderCallback> encoderCallback, const Stream& stream)
 {
-    Ptr<EncoderCallBack> encoderCallback(new EncoderCallBackFFMPEG(fileName, frameSize, fps));
-    return createVideoWriter(encoderCallback, frameSize, fps, format);
+    encoderCallback = encoderCallback ? encoderCallback : new RawVideoWriter(fileName);
+    return makePtr<VideoWriterImpl>(encoderCallback, frameSize, codec, fps, colorFormat, stream);
 }
 
-Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const String& fileName, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format)
+Ptr<VideoWriter> createVideoWriter(const String& fileName, const Size frameSize, const Codec codec, const double fps, const ColorFormat colorFormat,
+    const EncoderParams& params, Ptr<EncoderCallback> encoderCallback, const Stream& stream)
 {
-    Ptr<EncoderCallBack> encoderCallback(new EncoderCallBackFFMPEG(fileName, frameSize, fps));
-    return createVideoWriter(encoderCallback, frameSize, fps, params, format);
+    encoderCallback = encoderCallback ? encoderCallback : new RawVideoWriter(fileName);
+    return makePtr<VideoWriterImpl>(encoderCallback, frameSize, codec, fps, colorFormat, params, stream);
 }
 
-Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const Ptr<EncoderCallBack>& encoderCallback, Size frameSize, double fps, SurfaceFormat format)
-{
-    return makePtr<VideoWriterImpl>(encoderCallback, frameSize, fps, format);
-}
-
-Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const Ptr<EncoderCallBack>& encoderCallback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format)
-{
-    return makePtr<VideoWriterImpl>(encoderCallback, frameSize, fps, params, format);
-}
+#endif // !defined HAVE_NVCUVENC
 
-#endif // !defined HAVE_NVCUVENC || !defined _WIN32 || defined HAVE_FFMPEG_WRAPPER
+}}
diff --git a/modules/cudacodec/test/test_video.cpp b/modules/cudacodec/test/test_video.cpp
index b194ffa2f8b..b9b4e9f25c6 100644
--- a/modules/cudacodec/test/test_video.cpp
+++ b/modules/cudacodec/test/test_video.cpp
@@ -143,8 +143,8 @@ CUDA_TEST_P(CheckExtraData, Reader)
     ASSERT_EQ(extraDataIdx, 1 );
     ASSERT_TRUE(reader->grab());
     cv::Mat extraData;
-    const bool newData = reader->retrieve(extraData, extraDataIdx);
-    ASSERT_TRUE(newData && sz || !newData && !sz);
+    const bool newData = reader->retrieve(extraData, static_cast<size_t>(extraDataIdx));
+    ASSERT_TRUE((newData && sz) || (!newData && !sz));
     ASSERT_EQ(extraData.total(), sz);
 }
 
@@ -170,11 +170,11 @@ CUDA_TEST_P(CheckKeyFrame, Reader)
         ASSERT_TRUE(reader->grab());
         double N = -1;
         ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_NUMBER_OF_RAW_PACKAGES_SINCE_LAST_GRAB,N));
-        for (int i = rawIdxBase; i < N + rawIdxBase; i++) {
+        for (int i = static_cast<int>(rawIdxBase); i < static_cast<int>(N + rawIdxBase); i++) {
             nPackages++;
             double containsKeyFrame = i;
             ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_LRF_HAS_KEY_FRAME, containsKeyFrame));
-            ASSERT_TRUE(nPackages == 1 && containsKeyFrame || nPackages == 2 && !containsKeyFrame) << "nPackage: " << i;
+            ASSERT_TRUE((nPackages == 1 && containsKeyFrame) || (nPackages == 2 && !containsKeyFrame)) << "nPackage: " << i;
             if (nPackages >= maxNPackagesToCheck)
                 break;
         }
@@ -192,17 +192,18 @@ CUDA_TEST_P(Scaling, Reader)
     GpuMat frameOr;
     {
         cv::Ptr<cv::cudacodec::VideoReader> readerGs = cv::cudacodec::createVideoReader(inputFile);
-        readerGs->set(cudacodec::ColorFormat::GRAY);
+        ASSERT_TRUE(readerGs->set(cudacodec::ColorFormat::GRAY));
         ASSERT_TRUE(readerGs->nextFrame(frameOr));
     }
 
     cudacodec::VideoReaderInitParams params;
-    params.targetSz = Size(frameOr.cols * targetSzIn.width, frameOr.rows * targetSzIn.height);
-    params.srcRoi = Rect(frameOr.cols * srcRoiIn.x, frameOr.rows * srcRoiIn.y, frameOr.cols * srcRoiIn.width, frameOr.rows * srcRoiIn.height);
-    params.targetRoi = Rect(params.targetSz.width * targetRoiIn.x, params.targetSz.height * targetRoiIn.y, params.targetSz.width * targetRoiIn.width,
-        params.targetSz.height * targetRoiIn.height);
+    params.targetSz = Size(static_cast<int>(frameOr.cols * targetSzIn.width), static_cast<int>(frameOr.rows * targetSzIn.height));
+    params.srcRoi = Rect(static_cast<int>(frameOr.cols * srcRoiIn.x), static_cast<int>(frameOr.rows * srcRoiIn.y), static_cast<int>(frameOr.cols * srcRoiIn.width),
+        static_cast<int>(frameOr.rows * srcRoiIn.height));
+    params.targetRoi = Rect(static_cast<int>(params.targetSz.width * targetRoiIn.x), static_cast<int>(params.targetSz.height * targetRoiIn.y),
+        static_cast<int>(params.targetSz.width * targetRoiIn.width), static_cast<int>(params.targetSz.height * targetRoiIn.height));
     cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile, {}, params);
-    reader->set(cudacodec::ColorFormat::GRAY);
+    ASSERT_TRUE(reader->set(cudacodec::ColorFormat::GRAY));
     GpuMat frame;
     ASSERT_TRUE(reader->nextFrame(frame));
     const cudacodec::FormatInfo format = reader->format();
@@ -239,24 +240,25 @@ CUDA_TEST_P(Video, Reader)
         {cudacodec::ColorFormat::GRAY,1},
         {cudacodec::ColorFormat::BGR,3},
         {cudacodec::ColorFormat::BGRA,4},
-        {cudacodec::ColorFormat::YUV,1}
+        {cudacodec::ColorFormat::NV_NV12,1}
     };
 
     std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../" + GET_PARAM(1);
     cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile);
+    ASSERT_FALSE(reader->set(cudacodec::ColorFormat::RGB));
     cv::cudacodec::FormatInfo fmt = reader->format();
     cv::cuda::GpuMat frame;
     for (int i = 0; i < 10; i++)
     {
         // request a different colour format for each frame
         const std::pair< cudacodec::ColorFormat, int>& formatToChannels = formatsToChannels[i % formatsToChannels.size()];
-        reader->set(formatToChannels.first);
+        ASSERT_TRUE(reader->set(formatToChannels.first));
         double colorFormat;
         ASSERT_TRUE(reader->get(cudacodec::VideoReaderProps::PROP_COLOR_FORMAT, colorFormat) && static_cast<cudacodec::ColorFormat>(colorFormat) == formatToChannels.first);
         ASSERT_TRUE(reader->nextFrame(frame));
         if(!fmt.valid)
             fmt = reader->format();
-        const int height = formatToChannels.first == cudacodec::ColorFormat::YUV ? 1.5 * fmt.height : fmt.height;
+        const int height = formatToChannels.first == cudacodec::ColorFormat::NV_NV12 ? static_cast<int>(1.5 * fmt.height) : fmt.height;
         ASSERT_TRUE(frame.cols == fmt.width && frame.rows == height);
         ASSERT_FALSE(frame.empty());
         ASSERT_TRUE(frame.channels() == formatToChannels.second);
@@ -291,9 +293,9 @@ CUDA_TEST_P(VideoReadRaw, Reader)
             double N = -1;
             ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_NUMBER_OF_RAW_PACKAGES_SINCE_LAST_GRAB,N));
             ASSERT_TRUE(N >= 0) << N << " < 0";
-            for (int i = rawIdxBase; i <= N + rawIdxBase; i++) {
+            for (int j = static_cast<int>(rawIdxBase); j <= static_cast<int>(N + rawIdxBase); j++) {
                 Mat rawPackets;
-                reader->retrieve(rawPackets, i);
+                reader->retrieve(rawPackets, j);
                 file.write((char*)rawPackets.data, rawPackets.total());
             }
         }
@@ -315,7 +317,7 @@ CUDA_TEST_P(VideoReadRaw, Reader)
         {
             ASSERT_TRUE(readerReference->nextFrame(reference));
             ASSERT_TRUE(readerActual->grab());
-            ASSERT_TRUE(readerActual->retrieve(actual, decodedFrameIdx));
+            ASSERT_TRUE(readerActual->retrieve(actual, static_cast<size_t>(decodedFrameIdx)));
             actual.download(actualHost);
             reference.download(referenceHost);
             ASSERT_TRUE(cvtest::norm(actualHost, referenceHost, NORM_INF) == 0);
@@ -423,63 +425,242 @@ CUDA_TEST_P(CheckInitParams, Reader)
 
 #endif // HAVE_NVCUVID
 
-#if defined(_WIN32) && defined(HAVE_NVCUVENC)
-//////////////////////////////////////////////////////
-// VideoWriter
+#if defined(HAVE_NVCUVID) && defined(HAVE_NVCUVENC)
+struct TransCode : testing::TestWithParam<cv::cuda::DeviceInfo>
+{
+    cv::cuda::DeviceInfo devInfo;
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+        cv::cuda::setDevice(devInfo.deviceID());
+    }
+};
+
 
-CUDA_TEST_P(Video, Writer)
+CUDA_TEST_P(TransCode, H264ToH265)
 {
-    cv::cuda::setDevice(GET_PARAM(0).deviceID());
+    const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../highgui/video/big_buck_bunny.h264";
+    constexpr cv::cudacodec::ColorFormat colorFormat = cv::cudacodec::ColorFormat::NV_NV12;
+    constexpr double fps = 25;
+    const cudacodec::Codec codec = cudacodec::Codec::HEVC;
+    const std::string ext = ".h265";
+    const std::string outputFile = cv::tempfile(ext.c_str());
+    constexpr int nFrames = 5;
+    Size frameSz;
+    {
+        cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile);
+        cv::cudacodec::FormatInfo fmt = reader->format();
+        reader->set(cudacodec::ColorFormat::NV_NV12);
+        cv::Ptr<cv::cudacodec::VideoWriter> writer;
+        cv::cuda::GpuMat frame;
+        cv::cuda::Stream stream;
+        for (int i = 0; i < nFrames; ++i) {
+            ASSERT_TRUE(reader->nextFrame(frame, stream));
+            if (!fmt.valid) {
+                fmt = reader->format();
+                ASSERT_TRUE(fmt.valid);
+            }
+            ASSERT_FALSE(frame.empty());
+            Mat tst; frame.download(tst);
+            if (writer.empty()) {
+                frameSz = Size(fmt.width, fmt.height);
+                writer = cv::cudacodec::createVideoWriter(outputFile, frameSz, codec, fps, colorFormat, 0, stream);
+            }
+            writer->write(frame);
+        }
+    }
 
-    const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1);
+    {
+        cv::VideoCapture cap(outputFile);
+        ASSERT_TRUE(cap.isOpened());
+        const int width = static_cast<int>(cap.get(CAP_PROP_FRAME_WIDTH));
+        const int height = static_cast<int>(cap.get(CAP_PROP_FRAME_HEIGHT));
+        ASSERT_EQ(frameSz, Size(width, height));
+        ASSERT_EQ(fps, cap.get(CAP_PROP_FPS));
+        Mat frame;
+        for (int i = 0; i < nFrames; ++i) {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+        }
+    }
+    ASSERT_EQ(0, remove(outputFile.c_str()));
+}
 
-    std::string outputFile = cv::tempfile(".avi");
-    const double FPS = 25.0;
+INSTANTIATE_TEST_CASE_P(CUDA_Codec, TransCode, ALL_DEVICES);
+#endif
+
+#if defined(HAVE_NVCUVENC)
+
+//////////////////////////////////////////////////////
+// VideoWriter
+
+//==========================================================================
 
-    cv::VideoCapture reader(inputFile);
-    ASSERT_TRUE(reader.isOpened());
+void CvtColor(const Mat& in, Mat& out, const cudacodec::ColorFormat surfaceFormatCv) {
+    switch (surfaceFormatCv) {
+    case(cudacodec::ColorFormat::RGB):
+        return cv::cvtColor(in, out, COLOR_BGR2RGB);
+    case(cudacodec::ColorFormat::BGRA):
+        return cv::cvtColor(in, out, COLOR_BGR2BGRA);
+    case(cudacodec::ColorFormat::RGBA):
+        return cv::cvtColor(in, out, COLOR_BGR2RGBA);
+    case(cudacodec::ColorFormat::GRAY):
+        return cv::cvtColor(in, out, COLOR_BGR2GRAY);
+    default:
+        in.copyTo(out);
+    }
+}
 
-    cv::Ptr<cv::cudacodec::VideoWriter> d_writer;
+PARAM_TEST_CASE(Write, cv::cuda::DeviceInfo, bool, cv::cudacodec::Codec, double, cv::cudacodec::ColorFormat)
+{
+};
 
-    cv::Mat frame;
-    cv::cuda::GpuMat d_frame;
+CUDA_TEST_P(Write, Writer)
+{
+    cv::cuda::setDevice(GET_PARAM(0).deviceID());
+    const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../highgui/video/big_buck_bunny.mp4";
+    const bool deviceSrc = GET_PARAM(1);
+    const cudacodec::Codec codec = GET_PARAM(2);
+    const double fps = GET_PARAM(3);
+    const cv::cudacodec::ColorFormat colorFormat = GET_PARAM(4);
+    const std::string ext = codec == cudacodec::Codec::H264 ? ".h264" : ".hevc";
+    const std::string outputFile = cv::tempfile(ext.c_str());
+    constexpr int nFrames = 5;
+    Size frameSz;
+    {
+        cv::VideoCapture cap(inputFile);
+        ASSERT_TRUE(cap.isOpened());
+        cv::Ptr<cv::cudacodec::VideoWriter> writer;
+        cv::Mat frame, frameNewSf;
+        cv::cuda::GpuMat dFrame;
+        cv::cuda::Stream stream;
+        for (int i = 0; i < nFrames; ++i) {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+            if (writer.empty()) {
+                frameSz = frame.size();
+                writer = cv::cudacodec::createVideoWriter(outputFile, frameSz, codec, fps, colorFormat, 0, stream);
+            }
+            CvtColor(frame, frameNewSf, colorFormat);
+            if (deviceSrc) {
+                dFrame.upload(frameNewSf);
+                writer->write(dFrame);
+            }
+            else
+                writer->write(frameNewSf);
+        }
+    }
 
-    for (int i = 0; i < 10; ++i)
     {
-        reader >> frame;
-        ASSERT_FALSE(frame.empty());
+        cv::VideoCapture cap(outputFile);
+        ASSERT_TRUE(cap.isOpened());
+        const int width = static_cast<int>(cap.get(CAP_PROP_FRAME_WIDTH));
+        const int height = static_cast<int>(cap.get(CAP_PROP_FRAME_HEIGHT));
+        ASSERT_EQ(frameSz, Size(width, height));
+        ASSERT_TRUE(abs(fps - cap.get(CAP_PROP_FPS)) < 0.5);
+        Mat frame;
+        for (int i = 0; i < nFrames; ++i) {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+        }
+    }
+    ASSERT_EQ(0, remove(outputFile.c_str()));
+}
 
-        d_frame.upload(frame);
+#define DEVICE_SRC true, false
+#define FPS 10, 29.7
+#define CODEC cv::cudacodec::Codec::H264, cv::cudacodec::Codec::HEVC
+#define COLOR_FORMAT cv::cudacodec::ColorFormat::BGR, cv::cudacodec::ColorFormat::RGB, cv::cudacodec::ColorFormat::BGRA, \
+cv::cudacodec::ColorFormat::RGBA, cv::cudacodec::ColorFormat::GRAY
+INSTANTIATE_TEST_CASE_P(CUDA_Codec, Write, testing::Combine(ALL_DEVICES, testing::Values(DEVICE_SRC), testing::Values(CODEC), testing::Values(FPS),
+    testing::Values(COLOR_FORMAT)));
 
-        if (d_writer.empty())
-            d_writer = cv::cudacodec::createVideoWriter(outputFile, frame.size(), FPS);
 
-        d_writer->write(d_frame);
+struct EncoderParams : testing::TestWithParam<cv::cuda::DeviceInfo>
+{
+    cv::cuda::DeviceInfo devInfo;
+    cv::cudacodec::EncoderParams params;
+    virtual void SetUp()
+    {
+        devInfo = GetParam();
+        cv::cuda::setDevice(devInfo.deviceID());
+        // Fixed params for CBR test
+        params.nvPreset = cv::cudacodec::EncodePreset::ENC_PRESET_P7;
+        params.tuningInfo = cv::cudacodec::EncodeTuningInfo::ENC_TUNING_INFO_HIGH_QUALITY;
+        params.encodingProfile = cv::cudacodec::EncodeProfile::ENC_H264_PROFILE_MAIN;
+        params.rateControlMode = cv::cudacodec::EncodeParamsRcMode::ENC_PARAMS_RC_CBR;
+        params.multiPassEncoding = cv::cudacodec::EncodeMultiPass::ENC_TWO_PASS_FULL_RESOLUTION;
+        params.averageBitRate = 1000000;
+        params.maxBitRate = 0;
+        params.targetQuality = 0;
+        params.gopLength = 5;
     }
+};
 
-    reader.release();
-    d_writer.release();
 
-    reader.open(outputFile);
-    ASSERT_TRUE(reader.isOpened());
+CUDA_TEST_P(EncoderParams, Writer)
+{
+    const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../highgui/video/big_buck_bunny.mp4";
+    constexpr double fps = 25.0;
+    constexpr cudacodec::Codec codec = cudacodec::Codec::H264;
+    const std::string ext = ".h264";
+    const std::string outputFile = cv::tempfile(ext.c_str());
+    Size frameSz;
+    constexpr int nFrames = 5;
+    {
+        cv::VideoCapture reader(inputFile);
+        ASSERT_TRUE(reader.isOpened());
+        const cv::cudacodec::ColorFormat colorFormat = cv::cudacodec::ColorFormat::BGR;
+        cv::Ptr<cv::cudacodec::VideoWriter> writer;
+        cv::Mat frame;
+        cv::cuda::GpuMat dFrame;
+        cv::cuda::Stream stream;
+        for (int i = 0; i < nFrames; ++i) {
+            reader >> frame;
+            ASSERT_FALSE(frame.empty());
+            dFrame.upload(frame);
+            if (writer.empty()) {
+                frameSz = frame.size();
+                writer = cv::cudacodec::createVideoWriter(outputFile, frameSz, codec, fps, colorFormat, params, 0, stream);
+                cv::cudacodec::EncoderParams paramsOut = writer->getEncoderParams();
+                ASSERT_EQ(params, paramsOut);
+            }
+            writer->write(dFrame);
+        }
+    }
 
-    for (int i = 0; i < 5; ++i)
     {
-        reader >> frame;
-        ASSERT_FALSE(frame.empty());
+        cv::VideoCapture cap(outputFile);
+        ASSERT_TRUE(cap.isOpened());
+        const int width = static_cast<int>(cap.get(CAP_PROP_FRAME_WIDTH));
+        const int height = static_cast<int>(cap.get(CAP_PROP_FRAME_HEIGHT));
+        ASSERT_EQ(frameSz, Size(width, height));
+        ASSERT_EQ(fps, cap.get(CAP_PROP_FPS));
+        const bool checkGop = videoio_registry::hasBackend(CAP_FFMPEG);
+        Mat frame;
+        for (int i = 0; i < nFrames; ++i) {
+            cap >> frame;
+            ASSERT_FALSE(frame.empty());
+            if (checkGop && (cap.get(CAP_PROP_FRAME_TYPE) == 73)) {
+                ASSERT_TRUE(i % params.gopLength == 0);
+            }
+        }
     }
+    ASSERT_EQ(0, remove(outputFile.c_str()));
 }
 
-#endif // _WIN32, HAVE_NVCUVENC
+INSTANTIATE_TEST_CASE_P(CUDA_Codec, EncoderParams, ALL_DEVICES);
+
+#endif // HAVE_NVCUVENC
 
 INSTANTIATE_TEST_CASE_P(CUDA_Codec, CheckSet, testing::Combine(
     ALL_DEVICES,
     testing::Values("highgui/video/big_buck_bunny.mp4")));
 
 #define VIDEO_SRC_SCALING "highgui/video/big_buck_bunny.mp4"
-#define TARGET_SZ Size2f(1,1), Size2f(0.8,0.9), Size2f(2.3,1.8)
-#define SRC_ROI Rect2f(0,0,1,1), Rect2f(0.25,0.25,0.5,0.5)
-#define TARGET_ROI Rect2f(0,0,1,1), Rect2f(0.2,0.3,0.6,0.7)
+#define TARGET_SZ Size2f(1,1), Size2f(0.8f,0.9f), Size2f(2.3f,1.8f)
+#define SRC_ROI Rect2f(0,0,1,1), Rect2f(0.25f,0.25f,0.5f,0.5f)
+#define TARGET_ROI Rect2f(0,0,1,1), Rect2f(0.2f,0.3f,0.6f,0.7f)
 INSTANTIATE_TEST_CASE_P(CUDA_Codec, Scaling, testing::Combine(
     ALL_DEVICES, testing::Values(VIDEO_SRC_SCALING), testing::Values(TARGET_SZ), testing::Values(SRC_ROI), testing::Values(TARGET_ROI)));