Skip to content

Commit

Permalink
avcodec/cuvid: Add support for P010 as an output surface format
Browse files Browse the repository at this point in the history
The nvidia 375.xx driver introduces support for P016 output surfaces,
for 10bit and 12bit HEVC content (it's also the first driver to support
hardware decoding of 12bit content).

Technically, we don't support P016, but in practice I don't think we
zero-out the extra bits in P010 so it can be used to carry the data.

This change introduces cuvid decoder support for P010 output for
output to hardware and system memory surfaces. For simplicity, it
does not maintain the previous ability to output NV12 for > 8 bit
input video - the user will need to update their driver to decode
such videos.

After this change, both cuvid and nvenc support P010, but the
ffmpeg_cuvid transcoding logic will need more work to connect the
two together. Similarly, the scale_npp filter still only works with
8bit surfaces.
  • Loading branch information
philipl committed Nov 19, 2016
1 parent a930746 commit 5d678ac
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 10 deletions.
3 changes: 2 additions & 1 deletion compat/cuda/dynlink_cuviddec.h
Expand Up @@ -83,7 +83,8 @@ typedef enum cudaVideoCodec_enum {
* Video Surface Formats Enums
*/
typedef enum cudaVideoSurfaceFormat_enum {
cudaVideoSurfaceFormat_NV12=0 /**< NV12 (currently the only supported output format) */
cudaVideoSurfaceFormat_NV12=0, /**< NV12 */
cudaVideoSurfaceFormat_P016=1 /**< P016 */
} cudaVideoSurfaceFormat;

/*!
Expand Down
48 changes: 40 additions & 8 deletions libavcodec/cuvid.c
Expand Up @@ -28,6 +28,7 @@
#include "libavutil/fifo.h"
#include "libavutil/log.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"

#include "avcodec.h"
#include "internal.h"
Expand Down Expand Up @@ -156,7 +157,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
hwframe_ctx->width < avctx->width ||
hwframe_ctx->height < avctx->height ||
hwframe_ctx->format != AV_PIX_FMT_CUDA ||
hwframe_ctx->sw_format != AV_PIX_FMT_NV12)) {
hwframe_ctx->sw_format != avctx->sw_pix_fmt)) {
av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n");
ctx->internal_error = AVERROR(EINVAL);
return 0;
Expand All @@ -177,7 +178,19 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form

cuinfo.CodecType = ctx->codec_type = format->codec;
cuinfo.ChromaFormat = format->chroma_format;
cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;

switch (avctx->sw_pix_fmt) {
case AV_PIX_FMT_NV12:
cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
break;
case AV_PIX_FMT_P010:
cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
break;
default:
av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12 or P010 are not supported\n");
ctx->internal_error = AVERROR(EINVAL);
return 0;
}

cuinfo.ulWidth = avctx->coded_width;
cuinfo.ulHeight = avctx->coded_height;
Expand Down Expand Up @@ -209,7 +222,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form

if (!hwframe_ctx->pool) {
hwframe_ctx->format = AV_PIX_FMT_CUDA;
hwframe_ctx->sw_format = AV_PIX_FMT_NV12;
hwframe_ctx->sw_format = avctx->sw_pix_fmt;
hwframe_ctx->width = avctx->width;
hwframe_ctx->height = avctx->height;

Expand Down Expand Up @@ -417,7 +430,8 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)

offset += avctx->coded_height;
}
} else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {
} else if (avctx->pix_fmt == AV_PIX_FMT_NV12 ||
avctx->pix_fmt == AV_PIX_FMT_P010) {
AVFrame *tmp_frame = av_frame_alloc();
if (!tmp_frame) {
av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
Expand Down Expand Up @@ -615,15 +629,32 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
const AVBitStreamFilter *bsf;
int ret = 0;

enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
AV_PIX_FMT_NV12,
AV_PIX_FMT_NONE };
enum AVPixelFormat pix_fmts_nv12[3] = { AV_PIX_FMT_CUDA,
AV_PIX_FMT_NV12,
AV_PIX_FMT_NONE };

enum AVPixelFormat pix_fmts_p010[3] = { AV_PIX_FMT_CUDA,
AV_PIX_FMT_P010,
AV_PIX_FMT_NONE };

const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(avctx->pix_fmt);
if (!pixdesc) {
av_log(avctx, AV_LOG_ERROR, "av_pix_fmt_desc_get failed: %d\n", ret);
return -1;
}

ret = ff_get_format(avctx, pix_fmts);
ret = ff_get_format(avctx, pixdesc->comp[0].depth > 8 ?
pix_fmts_p010 : pix_fmts_nv12);
if (ret < 0) {
av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret);
return ret;
}

av_log(avctx, AV_LOG_VERBOSE, "Formats: Original: %s | HW: %s | SW: %s\n",
av_get_pix_fmt_name(avctx->pix_fmt),
av_get_pix_fmt_name(ret),
av_get_pix_fmt_name(avctx->sw_pix_fmt));

avctx->pix_fmt = ret;

ret = cuvid_load_functions(&ctx->cvdl);
Expand Down Expand Up @@ -899,6 +930,7 @@ static const AVOption options[] = {
.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
.pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
AV_PIX_FMT_NV12, \
AV_PIX_FMT_P010, \
AV_PIX_FMT_NONE }, \
};

Expand Down
11 changes: 10 additions & 1 deletion libavutil/hwcontext_cuda.c
Expand Up @@ -35,6 +35,7 @@ static const enum AVPixelFormat supported_formats[] = {
AV_PIX_FMT_NV12,
AV_PIX_FMT_YUV420P,
AV_PIX_FMT_YUV444P,
AV_PIX_FMT_P010,
};

static void cuda_buffer_free(void *opaque, uint8_t *data)
Expand Down Expand Up @@ -111,6 +112,7 @@ static int cuda_frames_init(AVHWFramesContext *ctx)
size = aligned_width * ctx->height * 3 / 2;
break;
case AV_PIX_FMT_YUV444P:
case AV_PIX_FMT_P010:
size = aligned_width * ctx->height * 3;
break;
}
Expand All @@ -125,14 +127,21 @@ static int cuda_frames_init(AVHWFramesContext *ctx)

static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
{
int aligned_width = FFALIGN(ctx->width, CUDA_FRAME_ALIGNMENT);
int aligned_width;
int width_in_bytes = ctx->width;

if (ctx->sw_format == AV_PIX_FMT_P010) {
width_in_bytes *= 2;
}
aligned_width = FFALIGN(width_in_bytes, CUDA_FRAME_ALIGNMENT);

frame->buf[0] = av_buffer_pool_get(ctx->pool);
if (!frame->buf[0])
return AVERROR(ENOMEM);

switch (ctx->sw_format) {
case AV_PIX_FMT_NV12:
case AV_PIX_FMT_P010:
frame->data[0] = frame->buf[0]->data;
frame->data[1] = frame->data[0] + aligned_width * ctx->height;
frame->linesize[0] = aligned_width;
Expand Down

0 comments on commit 5d678ac

Please sign in to comment.