From 379861f445aff86f6a50f9eefd15fa09f697ac56 Mon Sep 17 00:00:00 2001 From: Philip Langdale Date: Wed, 23 Oct 2019 18:01:52 -0700 Subject: [PATCH 1/9] avutil/hwcontext: Add support for HW -> HW transfers We are beginning to consider scenarios where a given HW Context may be able to transfer frames to another HW Context without passing via system memory - this would usually be when two contexts represent different APIs on the same device (eg: Vulkan and CUDA). This is modelled as a transfer, as we have today, but where both the src and the dst are hardware frames with hw contexts. We need to be careful to ensure the contexts are compatible - particularly, we cannot do transfers where one of the frames has been mapped via a derived frames context - we can only do transfers for frames that were directly allocated by the specified context. Additionally, as we have two hardware contexts, the transfer function could be implemented by either (or indeed both). To handle this uncertainty, we explicitly look for ENOSYS as an indicator to try the transfer in the other direction before giving up. --- libavutil/hwcontext.c | 53 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c index f1e404ab2015d..3189391c078b9 100644 --- a/libavutil/hwcontext.c +++ b/libavutil/hwcontext.c @@ -444,21 +444,54 @@ int av_hwframe_transfer_data(AVFrame *dst, const AVFrame *src, int flags) if (!dst->buf[0]) return transfer_data_alloc(dst, src, flags); - if (src->hw_frames_ctx) { - ctx = (AVHWFramesContext*)src->hw_frames_ctx->data; + /* + * Hardware -> Hardware Transfer. + * Unlike Software -> Hardware or Hardware -> Software, the transfer + * function could be provided by either the src or dst, depending on + * the specific combination of hardware. + */ + if (src->hw_frames_ctx && dst->hw_frames_ctx) { + AVHWFramesContext *src_ctx = + (AVHWFramesContext*)src->hw_frames_ctx->data; + AVHWFramesContext *dst_ctx = + (AVHWFramesContext*)dst->hw_frames_ctx->data; + + if (src_ctx->internal->source_frames) { + av_log(src_ctx, AV_LOG_ERROR, + "A device with a derived frame context cannot be used as " + "the source of a HW -> HW transfer."); + return AVERROR(ENOSYS); + } - ret = ctx->internal->hw_type->transfer_data_from(ctx, dst, src); - if (ret < 0) - return ret; - } else if (dst->hw_frames_ctx) { - ctx = (AVHWFramesContext*)dst->hw_frames_ctx->data; + if (dst_ctx->internal->source_frames) { + av_log(src_ctx, AV_LOG_ERROR, + "A device with a derived frame context cannot be used as " + "the destination of a HW -> HW transfer."); + return AVERROR(ENOSYS); + } - ret = ctx->internal->hw_type->transfer_data_to(ctx, dst, src); + ret = src_ctx->internal->hw_type->transfer_data_from(src_ctx, dst, src); + if (ret == AVERROR(ENOSYS)) + ret = dst_ctx->internal->hw_type->transfer_data_to(dst_ctx, dst, src); if (ret < 0) return ret; - } else - return AVERROR(ENOSYS); + } else { + if (src->hw_frames_ctx) { + ctx = (AVHWFramesContext*)src->hw_frames_ctx->data; + + ret = ctx->internal->hw_type->transfer_data_from(ctx, dst, src); + if (ret < 0) + return ret; + } else if (dst->hw_frames_ctx) { + ctx = (AVHWFramesContext*)dst->hw_frames_ctx->data; + ret = ctx->internal->hw_type->transfer_data_to(ctx, dst, src); + if (ret < 0) + return ret; + } else { + return AVERROR(ENOSYS); + } + } return 0; } From 136b0303555f38c3c21641fa26e1e3066da117df Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 28 Aug 2019 21:58:10 +0100 Subject: [PATCH 2/9] lavu: add Vulkan hwcontext code This commit adds the necessary code to initialize and use a Vulkan device within the hwcontext libavutil framework. Currently direct mapping to VAAPI and DRM frames is functional, as well as transfers to CUDA frames and native frames is supported. --- configure | 17 +- doc/APIchanges | 4 + libavutil/Makefile | 3 + libavutil/hwcontext.c | 4 + libavutil/hwcontext.h | 1 + libavutil/hwcontext_cuda.c | 121 ++ libavutil/hwcontext_internal.h | 1 + libavutil/hwcontext_vulkan.c | 2803 ++++++++++++++++++++++++++++++++ libavutil/hwcontext_vulkan.h | 150 ++ libavutil/pixdesc.c | 4 + libavutil/pixfmt.h | 3 + 11 files changed, 3105 insertions(+), 6 deletions(-) create mode 100644 libavutil/hwcontext_vulkan.c create mode 100644 libavutil/hwcontext_vulkan.h diff --git a/configure b/configure index eec43c3b06991..6a42981ff1cf4 100755 --- a/configure +++ b/configure @@ -309,6 +309,7 @@ External library support: --enable-openssl enable openssl, needed for https support if gnutls, libtls or mbedtls is not used [no] --enable-pocketsphinx enable PocketSphinx, needed for asr filter [no] + --enable-vulkan enable Vulkan code [no] --disable-sndio disable sndio support [autodetect] --disable-schannel disable SChannel SSP, needed for TLS support on Windows if openssl and gnutls are not used [autodetect] @@ -1549,11 +1550,11 @@ require_cc(){ } require_cpp(){ - name="$1" - headers="$2" - classes="$3" - shift 3 - check_lib_cpp "$headers" "$classes" "$@" || die "ERROR: $name not found" + log require_cpp "$@" + name_version="$1" + name="${1%% *}" + shift + check_lib_cpp "$name" "$@" || die "ERROR: $name_version not found" } require_headers(){ @@ -1854,6 +1855,7 @@ HWACCEL_LIBRARY_LIST=" mmal omx opencl + vulkan " DOCUMENT_LIST=" @@ -3640,7 +3642,7 @@ avformat_deps="avcodec avutil" avformat_suggest="libm network zlib" avresample_deps="avutil" avresample_suggest="libm" -avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl user32 vaapi videotoolbox corefoundation corevideo coremedia bcrypt" +avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl user32 vaapi vulkan videotoolbox corefoundation corevideo coremedia bcrypt" postproc_deps="avutil gpl" postproc_suggest="libm" swresample_deps="avutil" @@ -6627,6 +6629,9 @@ enabled vdpau && enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if.h" DtsCrystalHDVersion -lcrystalhd +enabled vulkan && + require_pkg_config vulkan "vulkan >= 1.1.97" "vulkan/vulkan.h" vkCreateInstance + if enabled x86; then case $target_os in mingw32*|mingw64*|win32|win64|linux|cygwin*) diff --git a/doc/APIchanges b/doc/APIchanges index 5b8d801f06acf..394d5eb40a9a2 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -15,6 +15,10 @@ libavutil: 2017-10-21 API changes, most recent first: +2020-ww-xx - xxxxxxxxxx - lavu yy.yy.yyy - hwcontext.h + Add AV_PIX_FMT_VULKAN + Add AV_HWDEVICE_TYPE_VULKAN and implementation. + 2019-12-xx - xxxxxxxxxx - lavu 56.37.100 - buffer.h Add av_buffer_pool_buffer_get_opaque(). diff --git a/libavutil/Makefile b/libavutil/Makefile index 57e6e3d7e8544..b189f9abea381 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -43,6 +43,7 @@ HEADERS = adler32.h \ hwcontext_vaapi.h \ hwcontext_videotoolbox.h \ hwcontext_vdpau.h \ + hwcontext_vulkan.h \ imgutils.h \ intfloat.h \ intreadwrite.h \ @@ -175,6 +176,7 @@ OBJS-$(CONFIG_QSV) += hwcontext_qsv.o OBJS-$(CONFIG_VAAPI) += hwcontext_vaapi.o OBJS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.o OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o +OBJS-$(CONFIG_VULKAN) += hwcontext_vulkan.o OBJS += $(COMPAT_OBJS:%=../compat/%) @@ -191,6 +193,7 @@ SKIPHEADERS-$(CONFIG_OPENCL) += hwcontext_opencl.h SKIPHEADERS-$(CONFIG_VAAPI) += hwcontext_vaapi.h SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.h SKIPHEADERS-$(CONFIG_VDPAU) += hwcontext_vdpau.h +SKIPHEADERS-$(CONFIG_VULKAN) += hwcontext_vulkan.h TESTPROGS = adler32 \ aes \ diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c index 3189391c078b9..d09a15a249156 100644 --- a/libavutil/hwcontext.c +++ b/libavutil/hwcontext.c @@ -58,6 +58,9 @@ static const HWContextType * const hw_table[] = { #endif #if CONFIG_MEDIACODEC &ff_hwcontext_type_mediacodec, +#endif +#if CONFIG_VULKAN + &ff_hwcontext_type_vulkan, #endif NULL, }; @@ -73,6 +76,7 @@ static const char *const hw_type_names[] = { [AV_HWDEVICE_TYPE_VDPAU] = "vdpau", [AV_HWDEVICE_TYPE_VIDEOTOOLBOX] = "videotoolbox", [AV_HWDEVICE_TYPE_MEDIACODEC] = "mediacodec", + [AV_HWDEVICE_TYPE_VULKAN] = "vulkan", }; enum AVHWDeviceType av_hwdevice_find_type_by_name(const char *name) diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h index f5a4b62387747..f874af9f8fc1d 100644 --- a/libavutil/hwcontext.h +++ b/libavutil/hwcontext.h @@ -36,6 +36,7 @@ enum AVHWDeviceType { AV_HWDEVICE_TYPE_DRM, AV_HWDEVICE_TYPE_OPENCL, AV_HWDEVICE_TYPE_MEDIACODEC, + AV_HWDEVICE_TYPE_VULKAN, }; typedef struct AVHWDeviceInternal AVHWDeviceInternal; diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c index 30611b1912051..18abb87bbdc6d 100644 --- a/libavutil/hwcontext_cuda.c +++ b/libavutil/hwcontext_cuda.c @@ -21,6 +21,9 @@ #include "hwcontext.h" #include "hwcontext_internal.h" #include "hwcontext_cuda_internal.h" +#if CONFIG_VULKAN +#include "hwcontext_vulkan.h" +#endif #include "cuda_check.h" #include "mem.h" #include "pixdesc.h" @@ -42,6 +45,9 @@ static const enum AVPixelFormat supported_formats[] = { AV_PIX_FMT_YUV444P16, AV_PIX_FMT_0RGB32, AV_PIX_FMT_0BGR32, +#if CONFIG_VULKAN + AV_PIX_FMT_VULKAN, +#endif }; #define CHECK_CU(x) FF_CUDA_CHECK_DL(device_ctx, cu, x) @@ -205,6 +211,10 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, CUcontext dummy; int i, ret; + /* We don't support transfers to HW devices. */ + if (dst->hw_frames_ctx) + return AVERROR(ENOSYS); + ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); if (ret < 0) return ret; @@ -247,6 +257,10 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, CUcontext dummy; int i, ret; + /* We don't support transfers from HW devices. */ + if (src->hw_frames_ctx) + return AVERROR(ENOSYS); + ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); if (ret < 0) return ret; @@ -389,6 +403,112 @@ static int cuda_device_create(AVHWDeviceContext *device_ctx, return AVERROR_UNKNOWN; } +static int cuda_device_derive(AVHWDeviceContext *device_ctx, + AVHWDeviceContext *src_ctx, + int flags) { + AVCUDADeviceContext *hwctx = device_ctx->hwctx; + CudaFunctions *cu; + const char *src_uuid = NULL; + CUcontext dummy; + int ret, i, device_count, dev_active = 0; + unsigned int dev_flags = 0; + + const unsigned int desired_flags = CU_CTX_SCHED_BLOCKING_SYNC; + + switch (src_ctx->type) { +#if CONFIG_VULKAN + case AV_HWDEVICE_TYPE_VULKAN: { + AVVulkanDeviceContext *vkctx = src_ctx->hwctx; + src_uuid = vkctx->device_uuid; + break; + } +#endif + default: + return AVERROR(ENOSYS); + } + + if (!src_uuid) { + av_log(device_ctx, AV_LOG_ERROR, + "Failed to get UUID of source device.\n"); + goto error; + } + + if (cuda_device_init(device_ctx) < 0) + goto error; + + cu = hwctx->internal->cuda_dl; + + ret = CHECK_CU(cu->cuInit(0)); + if (ret < 0) + goto error; + + ret = CHECK_CU(cu->cuDeviceGetCount(&device_count)); + if (ret < 0) + goto error; + + hwctx->internal->cuda_device = -1; + for (i = 0; i < device_count; i++) { + CUdevice dev; + CUuuid uuid; + + ret = CHECK_CU(cu->cuDeviceGet(&dev, i)); + if (ret < 0) + goto error; + + ret = CHECK_CU(cu->cuDeviceGetUuid(&uuid, dev)); + if (ret < 0) + goto error; + + if (memcmp(src_uuid, uuid.bytes, sizeof (uuid.bytes)) == 0) { + hwctx->internal->cuda_device = dev; + break; + } + } + + if (hwctx->internal->cuda_device == -1) { + av_log(device_ctx, AV_LOG_ERROR, "Could not derive CUDA device.\n"); + goto error; + } + + hwctx->internal->flags = flags; + + if (flags & AV_CUDA_USE_PRIMARY_CONTEXT) { + ret = CHECK_CU(cu->cuDevicePrimaryCtxGetState(hwctx->internal->cuda_device, &dev_flags, &dev_active)); + if (ret < 0) + goto error; + + if (dev_active && dev_flags != desired_flags) { + av_log(device_ctx, AV_LOG_ERROR, "Primary context already active with incompatible flags.\n"); + goto error; + } else if (dev_flags != desired_flags) { + ret = CHECK_CU(cu->cuDevicePrimaryCtxSetFlags(hwctx->internal->cuda_device, desired_flags)); + if (ret < 0) + goto error; + } + + ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, hwctx->internal->cuda_device)); + if (ret < 0) + goto error; + } else { + ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, desired_flags, hwctx->internal->cuda_device)); + if (ret < 0) + goto error; + + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + } + + hwctx->internal->is_allocated = 1; + + // Setting stream to NULL will make functions automatically use the default CUstream + hwctx->stream = NULL; + + return 0; + +error: + cuda_device_uninit(device_ctx); + return AVERROR_UNKNOWN; +} + const HWContextType ff_hwcontext_type_cuda = { .type = AV_HWDEVICE_TYPE_CUDA, .name = "CUDA", @@ -397,6 +517,7 @@ const HWContextType ff_hwcontext_type_cuda = { .frames_priv_size = sizeof(CUDAFramesContext), .device_create = cuda_device_create, + .device_derive = cuda_device_derive, .device_init = cuda_device_init, .device_uninit = cuda_device_uninit, .frames_get_constraints = cuda_frames_get_constraints, diff --git a/libavutil/hwcontext_internal.h b/libavutil/hwcontext_internal.h index 77dc47ddd6e64..dba0f39944ced 100644 --- a/libavutil/hwcontext_internal.h +++ b/libavutil/hwcontext_internal.h @@ -172,5 +172,6 @@ extern const HWContextType ff_hwcontext_type_vaapi; extern const HWContextType ff_hwcontext_type_vdpau; extern const HWContextType ff_hwcontext_type_videotoolbox; extern const HWContextType ff_hwcontext_type_mediacodec; +extern const HWContextType ff_hwcontext_type_vulkan; #endif /* AVUTIL_HWCONTEXT_INTERNAL_H */ diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c new file mode 100644 index 0000000000000..77c33cfda863e --- /dev/null +++ b/libavutil/hwcontext_vulkan.c @@ -0,0 +1,2803 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "pixdesc.h" +#include "avstring.h" +#include "imgutils.h" +#include "hwcontext.h" +#include "hwcontext_internal.h" +#include "hwcontext_vulkan.h" + +#if CONFIG_LIBDRM +#include +#include +#include +#include "hwcontext_drm.h" +#if CONFIG_VAAPI +#include +#include "hwcontext_vaapi.h" +#endif +#endif + +#if CONFIG_CUDA +#include "hwcontext_cuda_internal.h" +#include "cuda_check.h" +#define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x) +#endif + +typedef struct VulkanExecCtx { + VkCommandPool pool; + VkCommandBuffer buf; + VkQueue queue; + VkFence fence; +} VulkanExecCtx; + +typedef struct VulkanDevicePriv { + /* Properties */ + VkPhysicalDeviceProperties props; + VkPhysicalDeviceMemoryProperties mprops; + + /* Debug callback */ + VkDebugUtilsMessengerEXT debug_ctx; + + /* Image uploading */ + VulkanExecCtx cmd; + + /* Extensions */ + uint64_t extensions; + + /* Settings */ + int use_linear_images; +} VulkanDevicePriv; + +typedef struct AVVkFrameInternal { +#if CONFIG_CUDA + /* Importing external memory into cuda is really expensive so we keep the + * memory imported all the time */ + AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */ + CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS]; + CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS]; + CUarray cu_array[AV_NUM_DATA_POINTERS]; + CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS]; +#endif +} AVVkFrameInternal; + +#define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \ + vkGetInstanceProcAddr(inst, #name) + +#define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \ + VK_IMAGE_USAGE_STORAGE_BIT | \ + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \ + VK_IMAGE_USAGE_TRANSFER_DST_BIT) + +#define ADD_VAL_TO_LIST(list, count, val) \ + do { \ + list = av_realloc_array(list, sizeof(*list), ++count); \ + if (!list) { \ + err = AVERROR(ENOMEM); \ + goto end; \ + } \ + list[count - 1] = val; \ + } while(0) + +static const struct { + enum AVPixelFormat pixfmt; + const VkFormat vkfmts[3]; +} vk_pixfmt_map[] = { + { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } }, + { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } }, + { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } }, + + { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, + { AV_PIX_FMT_P010, { VK_FORMAT_R10X6_UNORM_PACK16, VK_FORMAT_R10X6G10X6_UNORM_2PACK16 } }, + { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { AV_PIX_FMT_NV16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + + { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + + { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + + { AV_PIX_FMT_ABGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } }, + { AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } }, + { AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } }, + { AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } }, + { AV_PIX_FMT_BGR24, { VK_FORMAT_B8G8R8_UNORM } }, + { AV_PIX_FMT_RGB48, { VK_FORMAT_R16G16B16_UNORM } }, + { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } }, + { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } }, + { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } }, + { AV_PIX_FMT_BGR0, { VK_FORMAT_B8G8R8A8_UNORM } }, + { AV_PIX_FMT_0BGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } }, + { AV_PIX_FMT_RGB0, { VK_FORMAT_R8G8B8A8_UNORM } }, + + { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, +}; + +const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p) +{ + for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++) + if (vk_pixfmt_map[i].pixfmt == p) + return vk_pixfmt_map[i].vkfmts; + return NULL; +} + +static int pixfmt_is_supported(AVVulkanDeviceContext *hwctx, enum AVPixelFormat p, + int linear) +{ + const VkFormat *fmt = av_vkfmt_from_pixfmt(p); + int planes = av_pix_fmt_count_planes(p); + + if (!fmt) + return 0; + + for (int i = 0; i < planes; i++) { + VkFormatFeatureFlags flags; + VkFormatProperties2 prop = { + .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, + }; + vkGetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop); + flags = linear ? prop.formatProperties.linearTilingFeatures : + prop.formatProperties.optimalTilingFeatures; + if (!(flags & DEFAULT_USAGE_FLAGS)) + return 0; + } + + return 1; +} + +enum VulkanExtensions { + EXT_EXTERNAL_DMABUF_MEMORY = 1ULL << 0, /* VK_EXT_external_memory_dma_buf */ + EXT_DRM_MODIFIER_FLAGS = 1ULL << 1, /* VK_EXT_image_drm_format_modifier */ + EXT_EXTERNAL_FD_MEMORY = 1ULL << 2, /* VK_KHR_external_memory_fd */ + EXT_EXTERNAL_FD_SEM = 1ULL << 3, /* VK_KHR_external_semaphore_fd */ + + EXT_OPTIONAL = 1ULL << 62, + EXT_REQUIRED = 1ULL << 63, +}; + +typedef struct VulkanOptExtension { + const char *name; + uint64_t flag; +} VulkanOptExtension; + +static const VulkanOptExtension optional_instance_exts[] = { + /* For future use */ +}; + +static const VulkanOptExtension optional_device_exts[] = { + { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, EXT_EXTERNAL_FD_MEMORY, }, + { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, EXT_EXTERNAL_DMABUF_MEMORY, }, + { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, EXT_DRM_MODIFIER_FLAGS, }, + { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, EXT_EXTERNAL_FD_SEM, }, +}; + +/* Converts return values to strings */ +static const char *vk_ret2str(VkResult res) +{ +#define CASE(VAL) case VAL: return #VAL + switch (res) { + CASE(VK_SUCCESS); + CASE(VK_NOT_READY); + CASE(VK_TIMEOUT); + CASE(VK_EVENT_SET); + CASE(VK_EVENT_RESET); + CASE(VK_INCOMPLETE); + CASE(VK_ERROR_OUT_OF_HOST_MEMORY); + CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY); + CASE(VK_ERROR_INITIALIZATION_FAILED); + CASE(VK_ERROR_DEVICE_LOST); + CASE(VK_ERROR_MEMORY_MAP_FAILED); + CASE(VK_ERROR_LAYER_NOT_PRESENT); + CASE(VK_ERROR_EXTENSION_NOT_PRESENT); + CASE(VK_ERROR_FEATURE_NOT_PRESENT); + CASE(VK_ERROR_INCOMPATIBLE_DRIVER); + CASE(VK_ERROR_TOO_MANY_OBJECTS); + CASE(VK_ERROR_FORMAT_NOT_SUPPORTED); + CASE(VK_ERROR_FRAGMENTED_POOL); + CASE(VK_ERROR_SURFACE_LOST_KHR); + CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); + CASE(VK_SUBOPTIMAL_KHR); + CASE(VK_ERROR_OUT_OF_DATE_KHR); + CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); + CASE(VK_ERROR_VALIDATION_FAILED_EXT); + CASE(VK_ERROR_INVALID_SHADER_NV); + CASE(VK_ERROR_OUT_OF_POOL_MEMORY); + CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); + CASE(VK_ERROR_NOT_PERMITTED_EXT); + CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT); + CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT); + CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT); + default: return "Unknown error"; + } +#undef CASE +} + +static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, + VkDebugUtilsMessageTypeFlagsEXT messageType, + const VkDebugUtilsMessengerCallbackDataEXT *data, + void *priv) +{ + int l; + AVHWDeviceContext *ctx = priv; + + switch (severity) { + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l = AV_LOG_ERROR; break; + default: l = AV_LOG_DEBUG; break; + } + + av_log(ctx, l, "%s\n", data->pMessage); + for (int i = 0; i < data->cmdBufLabelCount; i++) + av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName); + + return 0; +} + +static int check_extensions(AVHWDeviceContext *ctx, int dev, + const char * const **dst, uint32_t *num, int debug) +{ + const char *tstr; + const char **extension_names = NULL; + VulkanDevicePriv *p = ctx->internal->priv; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + int err = 0, found, extensions_found = 0; + + const char *mod; + int optional_exts_num; + uint32_t sup_ext_count; + VkExtensionProperties *sup_ext; + const VulkanOptExtension *optional_exts; + + if (!dev) { + mod = "instance"; + optional_exts = optional_instance_exts; + optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts); + vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL); + sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties)); + if (!sup_ext) + return AVERROR(ENOMEM); + vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext); + } else { + mod = "device"; + optional_exts = optional_device_exts; + optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts); + vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL, + &sup_ext_count, NULL); + sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties)); + if (!sup_ext) + return AVERROR(ENOMEM); + vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL, + &sup_ext_count, sup_ext); + } + + for (int i = 0; i < optional_exts_num; i++) { + int req = optional_exts[i].flag & EXT_REQUIRED; + tstr = optional_exts[i].name; + + found = 0; + for (int j = 0; j < sup_ext_count; j++) { + if (!strcmp(tstr, sup_ext[j].extensionName)) { + found = 1; + break; + } + } + if (!found) { + int lvl = req ? AV_LOG_ERROR : AV_LOG_VERBOSE; + av_log(ctx, lvl, "Extension \"%s\" not found!\n", tstr); + if (req) { + err = AVERROR(EINVAL); + goto end; + } + continue; + } + if (!req) + p->extensions |= optional_exts[i].flag; + + av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr); + + ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); + } + + if (debug && !dev) { + tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME; + found = 0; + for (int j = 0; j < sup_ext_count; j++) { + if (!strcmp(tstr, sup_ext[j].extensionName)) { + found = 1; + break; + } + } + if (found) { + ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); + } else { + av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n", + tstr); + err = AVERROR(EINVAL); + goto end; + } + } + + *dst = extension_names; + *num = extensions_found; + +end: + av_free(sup_ext); + return err; +} + +/* Creates a VkInstance */ +static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) +{ + int err = 0; + VkResult ret; + VulkanDevicePriv *p = ctx->internal->priv; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0); + const int debug_mode = debug_opt && strtol(debug_opt->value, NULL, 10); + VkApplicationInfo application_info = { + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pEngineName = "libavutil", + .apiVersion = VK_API_VERSION_1_1, + .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR, + LIBAVUTIL_VERSION_MINOR, + LIBAVUTIL_VERSION_MICRO), + }; + VkInstanceCreateInfo inst_props = { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pApplicationInfo = &application_info, + }; + + /* Check for present/missing extensions */ + err = check_extensions(ctx, 0, &inst_props.ppEnabledExtensionNames, + &inst_props.enabledExtensionCount, debug_mode); + if (err < 0) + return err; + + if (debug_mode) { + static const char *layers[] = { "VK_LAYER_LUNARG_standard_validation" }; + inst_props.ppEnabledLayerNames = layers; + inst_props.enabledLayerCount = FF_ARRAY_ELEMS(layers); + } + + /* Try to create the instance */ + ret = vkCreateInstance(&inst_props, hwctx->alloc, &hwctx->inst); + + /* Free used memory */ + av_free((void *)inst_props.ppEnabledExtensionNames); + + /* Check for errors */ + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + if (debug_mode) { + VkDebugUtilsMessengerCreateInfoEXT dbg = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, + .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, + .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, + .pfnUserCallback = vk_dbg_callback, + .pUserData = ctx, + }; + VK_LOAD_PFN(hwctx->inst, vkCreateDebugUtilsMessengerEXT); + + pfn_vkCreateDebugUtilsMessengerEXT(hwctx->inst, &dbg, + hwctx->alloc, &p->debug_ctx); + } + + return 0; +} + +typedef struct VulkanDeviceSelection { + uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */ + int has_uuid; + const char *name; /* Will use this second unless NULL */ + uint32_t pci_device; /* Will use this second unless 0x0 */ + uint32_t vendor_id; /* Last resort to find something deterministic */ + int index; /* Finally fall back to index */ +} VulkanDeviceSelection; + +static const char *vk_dev_type(enum VkPhysicalDeviceType type) +{ + switch (type) { + case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated"; + case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: return "discrete"; + case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: return "virtual"; + case VK_PHYSICAL_DEVICE_TYPE_CPU: return "software"; + default: return "unknown"; + } +} + +/* Finds a device */ +static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) +{ + int err = 0, choice = -1; + uint32_t num; + VkResult ret; + VkPhysicalDevice *devices = NULL; + VkPhysicalDeviceIDProperties *idp = NULL; + VkPhysicalDeviceProperties2 *prop = NULL; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + + ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, NULL); + if (ret != VK_SUCCESS || !num) { + av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret)); + return AVERROR(ENODEV); + } + + devices = av_malloc_array(num, sizeof(VkPhysicalDevice)); + if (!devices) + return AVERROR(ENOMEM); + + ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, devices); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n", + vk_ret2str(ret)); + err = AVERROR(ENODEV); + goto end; + } + + prop = av_mallocz_array(num, sizeof(*prop)); + if (!prop) { + err = AVERROR(ENOMEM); + goto end; + } + + idp = av_mallocz_array(num, sizeof(*idp)); + if (!idp) { + err = AVERROR(ENOMEM); + goto end; + } + + av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n"); + for (int i = 0; i < num; i++) { + idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES; + prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + prop[i].pNext = &idp[i]; + + vkGetPhysicalDeviceProperties2(devices[i], &prop[i]); + av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i, + prop[i].properties.deviceName, + vk_dev_type(prop[i].properties.deviceType), + prop[i].properties.deviceID); + } + + if (select->has_uuid) { + for (int i = 0; i < num; i++) { + if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) { + choice = i; + goto end; + } + } + av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n"); + err = AVERROR(ENODEV); + goto end; + } else if (select->name) { + av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name); + for (int i = 0; i < num; i++) { + if (strstr(prop[i].properties.deviceName, select->name)) { + choice = i; + goto end; + } + } + av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n", + select->name); + err = AVERROR(ENODEV); + goto end; + } else if (select->pci_device) { + av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device); + for (int i = 0; i < num; i++) { + if (select->pci_device == prop[i].properties.deviceID) { + choice = i; + goto end; + } + } + av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n", + select->pci_device); + err = AVERROR(EINVAL); + goto end; + } else if (select->vendor_id) { + av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id); + for (int i = 0; i < num; i++) { + if (select->vendor_id == prop[i].properties.vendorID) { + choice = i; + goto end; + } + } + av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n", + select->vendor_id); + err = AVERROR(ENODEV); + goto end; + } else { + if (select->index < num) { + choice = select->index; + goto end; + } + av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n", + select->index); + err = AVERROR(ENODEV); + goto end; + } + +end: + if (choice > -1) { + hwctx->phys_dev = devices[choice]; + memcpy(hwctx->device_uuid, idp[choice].deviceUUID, VK_UUID_SIZE); + } + av_free(devices); + av_free(prop); + av_free(idp); + + return err; +} + +static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd) +{ + uint32_t num; + VkQueueFamilyProperties *qs = NULL; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + int graph_index = -1, comp_index = -1, tx_index = -1; + VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos; + + /* First get the number of queue families */ + vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL); + if (!num) { + av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n"); + return AVERROR_EXTERNAL; + } + + /* Then allocate memory */ + qs = av_malloc_array(num, sizeof(VkQueueFamilyProperties)); + if (!qs) + return AVERROR(ENOMEM); + + /* Finally retrieve the queue families */ + vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qs); + +#define SEARCH_FLAGS(expr, out) \ + for (int i = 0; i < num; i++) { \ + const VkQueueFlagBits flags = qs[i].queueFlags; \ + if (expr) { \ + out = i; \ + break; \ + } \ + } + + SEARCH_FLAGS(flags & VK_QUEUE_GRAPHICS_BIT, graph_index) + + SEARCH_FLAGS((flags & VK_QUEUE_COMPUTE_BIT) && (i != graph_index), + comp_index) + + SEARCH_FLAGS((flags & VK_QUEUE_TRANSFER_BIT) && (i != graph_index) && + (i != comp_index), tx_index) + +#undef SEARCH_FLAGS +#define QF_FLAGS(flags) \ + ((flags) & VK_QUEUE_GRAPHICS_BIT ) ? "(graphics) " : "", \ + ((flags) & VK_QUEUE_COMPUTE_BIT ) ? "(compute) " : "", \ + ((flags) & VK_QUEUE_TRANSFER_BIT ) ? "(transfer) " : "", \ + ((flags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : "" + + av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for graphics, " + "flags: %s%s%s%s\n", graph_index, QF_FLAGS(qs[graph_index].queueFlags)); + + hwctx->queue_family_index = graph_index; + hwctx->queue_family_tx_index = graph_index; + hwctx->queue_family_comp_index = graph_index; + + pc[cd->queueCreateInfoCount++].queueFamilyIndex = graph_index; + + if (comp_index != -1) { + av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for compute, " + "flags: %s%s%s%s\n", comp_index, QF_FLAGS(qs[comp_index].queueFlags)); + hwctx->queue_family_tx_index = comp_index; + hwctx->queue_family_comp_index = comp_index; + pc[cd->queueCreateInfoCount++].queueFamilyIndex = comp_index; + } + + if (tx_index != -1) { + av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for transfers, " + "flags: %s%s%s%s\n", tx_index, QF_FLAGS(qs[tx_index].queueFlags)); + hwctx->queue_family_tx_index = tx_index; + pc[cd->queueCreateInfoCount++].queueFamilyIndex = tx_index; + } + +#undef QF_FLAGS + + av_free(qs); + + return 0; +} + +static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd, + int queue_family_index) +{ + VkResult ret; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + + VkCommandPoolCreateInfo cqueue_create = { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = queue_family_index, + }; + VkCommandBufferAllocateInfo cbuf_create = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = 1, + }; + + VkFenceCreateInfo fence_spawn = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + }; + + ret = vkCreateFence(hwctx->act_dev, &fence_spawn, + hwctx->alloc, &cmd->fence); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create, + hwctx->alloc, &cmd->pool); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Command pool creation failure: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + cbuf_create.commandPool = cmd->pool; + + ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create, &cmd->buf); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + vkGetDeviceQueue(hwctx->act_dev, cqueue_create.queueFamilyIndex, 0, + &cmd->queue); + + return 0; +} + +static void free_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd) +{ + AVVulkanDeviceContext *hwctx = ctx->hwctx; + + vkDestroyFence(hwctx->act_dev, cmd->fence, hwctx->alloc); + + if (cmd->buf) + vkFreeCommandBuffers(hwctx->act_dev, cmd->pool, 1, &cmd->buf); + if (cmd->pool) + vkDestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc); +} + +static void vulkan_device_free(AVHWDeviceContext *ctx) +{ + VulkanDevicePriv *p = ctx->internal->priv; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + + free_exec_ctx(ctx, &p->cmd); + + vkDestroyDevice(hwctx->act_dev, hwctx->alloc); + + if (p->debug_ctx) { + VK_LOAD_PFN(hwctx->inst, vkDestroyDebugUtilsMessengerEXT); + pfn_vkDestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx, + hwctx->alloc); + } + + vkDestroyInstance(hwctx->inst, hwctx->alloc); +} + +static int vulkan_device_create_internal(AVHWDeviceContext *ctx, + VulkanDeviceSelection *dev_select, + AVDictionary *opts, int flags) +{ + int err = 0; + VkResult ret; + AVDictionaryEntry *opt_d; + VulkanDevicePriv *p = ctx->internal->priv; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VkDeviceQueueCreateInfo queue_create_info[3] = { + { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .pQueuePriorities = (float []){ 1.0f }, + .queueCount = 1, }, + { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .pQueuePriorities = (float []){ 1.0f }, + .queueCount = 1, }, + { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .pQueuePriorities = (float []){ 1.0f }, + .queueCount = 1, }, + }; + + VkDeviceCreateInfo dev_info = { + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .pQueueCreateInfos = queue_create_info, + .queueCreateInfoCount = 0, + }; + + ctx->free = vulkan_device_free; + + /* Create an instance if not given one */ + if ((err = create_instance(ctx, opts))) + goto end; + + /* Find a device (if not given one) */ + if ((err = find_device(ctx, dev_select))) + goto end; + + vkGetPhysicalDeviceProperties(hwctx->phys_dev, &p->props); + av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", p->props.deviceName); + av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n"); + av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyOffsetAlignment: %li\n", + p->props.limits.optimalBufferCopyOffsetAlignment); + av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %li\n", + p->props.limits.optimalBufferCopyRowPitchAlignment); + av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %li\n", + p->props.limits.minMemoryMapAlignment); + + /* Search queue family */ + if ((err = search_queue_families(ctx, &dev_info))) + goto end; + + if ((err = check_extensions(ctx, 1, &dev_info.ppEnabledExtensionNames, + &dev_info.enabledExtensionCount, 0))) + goto end; + + ret = vkCreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc, + &hwctx->act_dev); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n", + vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto end; + } + + av_free((void *)dev_info.ppEnabledExtensionNames); + + /* Tiled images setting, use them by default */ + opt_d = av_dict_get(opts, "linear_images", NULL, 0); + if (opt_d) + p->use_linear_images = strtol(opt_d->value, NULL, 10); + +end: + return err; +} + +static int vulkan_device_init(AVHWDeviceContext *ctx) +{ + int err; + uint32_t queue_num; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VulkanDevicePriv *p = ctx->internal->priv; + + vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL); + if (!queue_num) { + av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n"); + return AVERROR_EXTERNAL; + } + + if (hwctx->queue_family_index >= queue_num || + hwctx->queue_family_tx_index >= queue_num || + hwctx->queue_family_comp_index >= queue_num) { + av_log(ctx, AV_LOG_ERROR, "Invalid queue index!\n"); + return AVERROR_EXTERNAL; + } + + /* Create exec context - if there's something invalid this will error out */ + err = create_exec_ctx(ctx, &p->cmd, hwctx->queue_family_tx_index); + if (err) + return err; + + /* Get device capabilities */ + vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); + + return 0; +} + +static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device, + AVDictionary *opts, int flags) +{ + VulkanDeviceSelection dev_select = { 0 }; + if (device && device[0]) { + char *end = NULL; + dev_select.index = strtol(device, &end, 10); + if (end == device) { + dev_select.index = 0; + dev_select.name = device; + } + } + + return vulkan_device_create_internal(ctx, &dev_select, opts, flags); +} + +static int vulkan_device_derive(AVHWDeviceContext *ctx, + AVHWDeviceContext *src_ctx, int flags) +{ + av_unused VulkanDeviceSelection dev_select = { 0 }; + + /* If there's only one device on the system, then even if its not covered + * by the following checks (e.g. non-PCIe ARM GPU), having an empty + * dev_select will mean it'll get picked. */ + switch(src_ctx->type) { +#if CONFIG_LIBDRM +#if CONFIG_VAAPI + case AV_HWDEVICE_TYPE_VAAPI: { + AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx; + + const char *vendor = vaQueryVendorString(src_hwctx->display); + if (!vendor) { + av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n"); + return AVERROR_EXTERNAL; + } + + if (strstr(vendor, "Intel")) + dev_select.vendor_id = 0x8086; + if (strstr(vendor, "AMD")) + dev_select.vendor_id = 0x1002; + + return vulkan_device_create_internal(ctx, &dev_select, NULL, flags); + } +#endif + case AV_HWDEVICE_TYPE_DRM: { + AVDRMDeviceContext *src_hwctx = src_ctx->hwctx; + + drmDevice *drm_dev_info; + int err = drmGetDevice(src_hwctx->fd, &drm_dev_info); + if (err) { + av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n"); + return AVERROR_EXTERNAL; + } + + if (drm_dev_info->bustype == DRM_BUS_PCI) + dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id; + + drmFreeDevice(&drm_dev_info); + + return vulkan_device_create_internal(ctx, &dev_select, NULL, flags); + } +#endif +#if CONFIG_CUDA + case AV_HWDEVICE_TYPE_CUDA: { + AVHWDeviceContext *cuda_cu = src_ctx; + AVCUDADeviceContext *src_hwctx = src_ctx->hwctx; + AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal; + CudaFunctions *cu = cu_internal->cuda_dl; + + int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid, + cu_internal->cuda_device)); + if (ret < 0) { + av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n"); + return AVERROR_EXTERNAL; + } + + dev_select.has_uuid = 1; + + return vulkan_device_create_internal(ctx, &dev_select, NULL, flags); + } +#endif + default: + return AVERROR(ENOSYS); + } +} + +static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, + const void *hwconfig, + AVHWFramesConstraints *constraints) +{ + int count = 0; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VulkanDevicePriv *p = ctx->internal->priv; + + for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++) + count += pixfmt_is_supported(hwctx, i, p->use_linear_images); + +#if CONFIG_CUDA + count++; +#endif + + constraints->valid_sw_formats = av_malloc_array(count + 1, + sizeof(enum AVPixelFormat)); + if (!constraints->valid_sw_formats) + return AVERROR(ENOMEM); + + count = 0; + for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++) + if (pixfmt_is_supported(hwctx, i, p->use_linear_images)) + constraints->valid_sw_formats[count++] = i; + +#if CONFIG_CUDA + constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA; +#endif + constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE; + + constraints->min_width = 0; + constraints->min_height = 0; + constraints->max_width = p->props.limits.maxImageDimension2D; + constraints->max_height = p->props.limits.maxImageDimension2D; + + constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat)); + if (!constraints->valid_hw_formats) + return AVERROR(ENOMEM); + + constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN; + constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE; + + return 0; +} + +static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) +{ + VkResult ret; + int index = -1; + VulkanDevicePriv *p = ctx->internal->priv; + AVVulkanDeviceContext *dev_hwctx = ctx->hwctx; + VkMemoryAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = alloc_extension, + }; + + /* Align if we need to */ + if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + req->size = FFALIGN(req->size, p->props.limits.minMemoryMapAlignment); + + alloc_info.allocationSize = req->size; + + /* The vulkan spec requires memory types to be sorted in the "optimal" + * order, so the first matching type we find will be the best/fastest one */ + for (int i = 0; i < p->mprops.memoryTypeCount; i++) { + /* The memory type must be supported by the requirements (bitfield) */ + if (!(req->memoryTypeBits & (1 << i))) + continue; + + /* The memory type flags must include our properties */ + if ((p->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags) + continue; + + /* Found a suitable memory type */ + index = i; + break; + } + + if (index < 0) { + av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", + req_flags); + return AVERROR(EINVAL); + } + + alloc_info.memoryTypeIndex = index; + + ret = vkAllocateMemory(dev_hwctx->act_dev, &alloc_info, + dev_hwctx->alloc, mem); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n", + vk_ret2str(ret)); + return AVERROR(ENOMEM); + } + + *mem_flags |= p->mprops.memoryTypes[index].propertyFlags; + + return 0; +} + +typedef struct VulkanFramesPriv { + VulkanExecCtx cmd; +} VulkanFramesPriv; + +static void vulkan_free_internal(AVVkFrameInternal *internal) +{ + if (!internal) + return; + +#if CONFIG_CUDA + if (internal->cuda_fc_ref) { + AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data; + int planes = av_pix_fmt_count_planes(cuda_fc->sw_format); + AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; + AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; + AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; + CudaFunctions *cu = cu_internal->cuda_dl; + + for (int i = 0; i < planes; i++) { + if (internal->cu_sem[i]) + CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i])); + if (internal->cu_mma[i]) + CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i])); + if (internal->ext_mem[i]) + CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i])); + } + + av_buffer_unref(&internal->cuda_fc_ref); + } +#endif + + av_free(internal); +} + +static void vulkan_frame_free(void *opaque, uint8_t *data) +{ + AVVkFrame *f = (AVVkFrame *)data; + AVHWFramesContext *hwfc = opaque; + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; + int planes = av_pix_fmt_count_planes(hwfc->sw_format); + + if (!f) + return; + + vulkan_free_internal(f->internal); + + for (int i = 0; i < planes; i++) { + vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); + vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); + vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); + } + + av_free(f); +} + +static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, + void *alloc_pnext, size_t alloc_pnext_stride) +{ + int err; + VkResult ret; + AVHWDeviceContext *ctx = hwfc->device_ctx; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } }; + + AVVulkanDeviceContext *hwctx = ctx->hwctx; + + for (int i = 0; i < planes; i++) { + int use_ded_mem; + VkImageMemoryRequirementsInfo2 req_desc = { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, + .image = f->img[i], + }; + VkMemoryDedicatedAllocateInfo ded_alloc = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, + .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride), + }; + VkMemoryDedicatedRequirements ded_req = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, + }; + VkMemoryRequirements2 req = { + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, + .pNext = &ded_req, + }; + + vkGetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req); + + /* In case the implementation prefers/requires dedicated allocation */ + use_ded_mem = ded_req.prefersDedicatedAllocation | + ded_req.requiresDedicatedAllocation; + if (use_ded_mem) + ded_alloc.image = f->img[i]; + + /* Allocate memory */ + if ((err = alloc_mem(ctx, &req.memoryRequirements, + f->tiling == VK_IMAGE_TILING_LINEAR ? + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT : + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, + &f->flags, &f->mem[i]))) + return err; + + f->size[i] = req.memoryRequirements.size; + bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; + bind_info[i].image = f->img[i]; + bind_info[i].memory = f->mem[i]; + } + + /* Bind the allocated memory to the images */ + ret = vkBindImageMemory2(hwctx->act_dev, planes, bind_info); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +static int prepare_frame(AVHWFramesContext *hwfc, AVVkFrame *frame) +{ + VkResult ret; + AVHWDeviceContext *ctx = hwfc->device_ctx; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VulkanFramesPriv *s = hwfc->internal->priv; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + + VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; + + VkCommandBufferBeginInfo cmd_start = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + + VkSubmitInfo s_info = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &s->cmd.buf, + + .pSignalSemaphores = frame->sem, + .signalSemaphoreCount = planes, + }; + + ret = vkBeginCommandBuffer(s->cmd.buf, &cmd_start); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + /* Change the image layout to something more optimal for writes */ + for (int i = 0; i < planes; i++) { + img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + img_bar[i].srcAccessMask = 0x0; + img_bar[i].dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + img_bar[i].oldLayout = frame->layout[i]; + img_bar[i].newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + img_bar[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + img_bar[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + img_bar[i].image = frame->img[i]; + img_bar[i].subresourceRange.levelCount = 1; + img_bar[i].subresourceRange.layerCount = 1; + img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + + frame->layout[i] = img_bar[i].newLayout; + frame->access[i] = img_bar[i].dstAccessMask; + } + + vkCmdPipelineBarrier(s->cmd.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + 0, NULL, 0, NULL, planes, img_bar); + + ret = vkEndCommandBuffer(s->cmd.buf); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + ret = vkQueueSubmit(s->cmd.queue, 1, &s_info, s->cmd.fence); + if (ret != VK_SUCCESS) { + return AVERROR_EXTERNAL; + } else { + vkWaitForFences(hwctx->act_dev, 1, &s->cmd.fence, VK_TRUE, UINT64_MAX); + vkResetFences(hwctx->act_dev, 1, &s->cmd.fence); + } + + return 0; +} + +static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, + VkImageTiling tiling, VkImageUsageFlagBits usage, + void *create_pnext) +{ + int err; + VkResult ret; + AVHWDeviceContext *ctx = hwfc->device_ctx; + VulkanDevicePriv *p = ctx->internal->priv; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + enum AVPixelFormat format = hwfc->sw_format; + const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format); + const int planes = av_pix_fmt_count_planes(format); + + VkExportSemaphoreCreateInfo ext_sem_info = { + .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, + .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, + }; + + VkSemaphoreCreateInfo sem_spawn = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = p->extensions & EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL, + }; + + AVVkFrame *f = av_mallocz(sizeof(*f)); + if (!f) { + av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n"); + err = AVERROR(ENOMEM); + goto fail; + } + + /* Create the images */ + for (int i = 0; i < planes; i++) { + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format); + int w = hwfc->width; + int h = hwfc->height; + const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w; + const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h; + + VkImageCreateInfo image_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = create_pnext, + .imageType = VK_IMAGE_TYPE_2D, + .format = img_fmts[i], + .extent.width = p_w, + .extent.height = p_h, + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .flags = VK_IMAGE_CREATE_ALIAS_BIT, + .tiling = tiling, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .samples = VK_SAMPLE_COUNT_1_BIT, + }; + + ret = vkCreateImage(hwctx->act_dev, &image_create_info, + hwctx->alloc, &f->img[i]); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n", + vk_ret2str(ret)); + err = AVERROR(EINVAL); + goto fail; + } + + /* Create semaphore */ + ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn, + hwctx->alloc, &f->sem[i]); + if (ret != VK_SUCCESS) { + av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + f->layout[i] = image_create_info.initialLayout; + f->access[i] = 0x0; + } + + f->flags = 0x0; + f->tiling = tiling; + + *frame = f; + return 0; + +fail: + vulkan_frame_free(hwfc, (uint8_t *)f); + return err; +} + +/* Checks if an export flag is enabled, and if it is ORs it with *iexp */ +static void try_export_flags(AVHWFramesContext *hwfc, + VkExternalMemoryHandleTypeFlags *comp_handle_types, + VkExternalMemoryHandleTypeFlagBits *iexp, + VkExternalMemoryHandleTypeFlagBits exp) +{ + VkResult ret; + AVVulkanFramesContext *hwctx = hwfc->hwctx; + AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx; + VkExternalImageFormatProperties eprops = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR, + }; + VkImageFormatProperties2 props = { + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, + .pNext = &eprops, + }; + VkPhysicalDeviceExternalImageFormatInfo enext = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO, + .handleType = exp, + }; + VkPhysicalDeviceImageFormatInfo2 pinfo = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, + .pNext = !exp ? NULL : &enext, + .format = av_vkfmt_from_pixfmt(hwfc->sw_format)[0], + .type = VK_IMAGE_TYPE_2D, + .tiling = hwctx->tiling, + .usage = hwctx->usage, + .flags = VK_IMAGE_CREATE_ALIAS_BIT, + }; + + ret = vkGetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev, + &pinfo, &props); + if (ret == VK_SUCCESS) { + *iexp |= exp; + *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes; + } +} + +static AVBufferRef *vulkan_pool_alloc(void *opaque, int size) +{ + int err; + AVVkFrame *f; + AVBufferRef *avbuf = NULL; + AVHWFramesContext *hwfc = opaque; + AVVulkanFramesContext *hwctx = hwfc->hwctx; + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS]; + VkExternalMemoryHandleTypeFlags e = 0x0; + + VkExternalMemoryImageCreateInfo eiinfo = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, + .pNext = hwctx->create_pnext, + }; + + if (p->extensions & EXT_EXTERNAL_FD_MEMORY) + try_export_flags(hwfc, &eiinfo.handleTypes, &e, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT); + + if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY) + try_export_flags(hwfc, &eiinfo.handleTypes, &e, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); + + for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) { + eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; + eminfo[i].pNext = hwctx->alloc_pnext[i]; + eminfo[i].handleTypes = e; + } + + err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, + eiinfo.handleTypes ? &eiinfo : NULL); + if (err) + goto fail; + + err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo)); + if (err) + goto fail; + + err = prepare_frame(hwfc, f); + if (err) + goto fail; + + avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame), + vulkan_frame_free, hwfc, 0); + if (!avbuf) + goto fail; + + return avbuf; + +fail: + vulkan_frame_free(hwfc, (uint8_t *)f); + return NULL; +} + +static void vulkan_frames_uninit(AVHWFramesContext *hwfc) +{ + VulkanFramesPriv *fp = hwfc->internal->priv; + + free_exec_ctx(hwfc->device_ctx, &fp->cmd); +} + +static int vulkan_frames_init(AVHWFramesContext *hwfc) +{ + int err; + AVVkFrame *f; + AVVulkanFramesContext *hwctx = hwfc->hwctx; + VulkanFramesPriv *fp = hwfc->internal->priv; + AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx; + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + + if (hwfc->pool) + return 0; + + /* Default pool flags */ + hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ? + VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; + + hwctx->usage |= DEFAULT_USAGE_FLAGS; + + err = create_exec_ctx(hwfc->device_ctx, &fp->cmd, + dev_hwctx->queue_family_tx_index); + if (err) + return err; + + /* Test to see if allocation will fail */ + err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, + hwctx->create_pnext); + if (err) + return err; + + vulkan_frame_free(hwfc, (uint8_t *)f); + + hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame), + hwfc, vulkan_pool_alloc, + NULL); + if (!hwfc->internal->pool_internal) + return AVERROR(ENOMEM); + + return 0; +} + +static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame) +{ + frame->buf[0] = av_buffer_pool_get(hwfc->pool); + if (!frame->buf[0]) + return AVERROR(ENOMEM); + + frame->data[0] = frame->buf[0]->data; + frame->format = AV_PIX_FMT_VULKAN; + frame->width = hwfc->width; + frame->height = hwfc->height; + + return 0; +} + +static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc, + enum AVHWFrameTransferDirection dir, + enum AVPixelFormat **formats) +{ + enum AVPixelFormat *fmts = av_malloc_array(2, sizeof(*fmts)); + if (!fmts) + return AVERROR(ENOMEM); + + fmts[0] = hwfc->sw_format; + fmts[1] = AV_PIX_FMT_NONE; + + *formats = fmts; + return 0; +} + +typedef struct VulkanMapping { + AVVkFrame *frame; + int flags; +} VulkanMapping; + +static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) +{ + VulkanMapping *map = hwmap->priv; + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + + /* Check if buffer needs flushing */ + if ((map->flags & AV_HWFRAME_MAP_WRITE) && + !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + VkResult ret; + VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } }; + + for (int i = 0; i < planes; i++) { + flush_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + flush_ranges[i].memory = map->frame->mem[i]; + flush_ranges[i].size = VK_WHOLE_SIZE; + } + + ret = vkFlushMappedMemoryRanges(hwctx->act_dev, planes, + flush_ranges); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n", + vk_ret2str(ret)); + } + } + + for (int i = 0; i < planes; i++) + vkUnmapMemory(hwctx->act_dev, map->frame->mem[i]); + + av_free(map); +} + +static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + VkResult ret; + int err, mapped_mem_count = 0; + AVVkFrame *f = (AVVkFrame *)src->data[0]; + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + + VulkanMapping *map = av_mallocz(sizeof(VulkanMapping)); + if (!map) + return AVERROR(EINVAL); + + if (src->format != AV_PIX_FMT_VULKAN) { + av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n", + av_get_pix_fmt_name(src->format)); + err = AVERROR(EINVAL); + goto fail; + } + + if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) || + !(f->tiling == VK_IMAGE_TILING_LINEAR)) { + av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible " + "and linear!\n"); + err = AVERROR(EINVAL); + goto fail; + } + + dst->width = src->width; + dst->height = src->height; + + for (int i = 0; i < planes; i++) { + ret = vkMapMemory(hwctx->act_dev, f->mem[i], 0, + VK_WHOLE_SIZE, 0, (void **)&dst->data[i]); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n", + vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + mapped_mem_count++; + } + + /* Check if the memory contents matter */ + if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) && + !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } }; + for (int i = 0; i < planes; i++) { + map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + map_mem_ranges[i].size = VK_WHOLE_SIZE; + map_mem_ranges[i].memory = f->mem[i]; + } + + ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, planes, + map_mem_ranges); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n", + vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + } + + for (int i = 0; i < planes; i++) { + VkImageSubresource sub = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + }; + VkSubresourceLayout layout; + vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout); + dst->linesize[i] = layout.rowPitch; + } + + map->frame = f; + map->flags = flags; + + err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, + &vulkan_unmap_frame, map); + if (err < 0) + goto fail; + + return 0; + +fail: + for (int i = 0; i < mapped_mem_count; i++) + vkUnmapMemory(hwctx->act_dev, f->mem[i]); + + av_free(map); + return err; +} + +#if CONFIG_LIBDRM +static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) +{ + VulkanMapping *map = hwmap->priv; + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + + for (int i = 0; i < planes; i++) { + vkDestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc); + vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc); + vkDestroySemaphore(hwctx->act_dev, map->frame->sem[i], hwctx->alloc); + } + + av_freep(&map->frame); +} + +static const struct { + uint32_t va_fourcc; + VkFormat vk_format; +} vulkan_drm_format_map[] = { + { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM }, + { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM }, + { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM }, + { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM }, + { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM }, + { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM }, + { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM }, + { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM }, + { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM }, + { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM }, +}; + +static inline VkFormat drm_to_vulkan_fmt(uint32_t va_fourcc) +{ + for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++) + if (vulkan_drm_format_map[i].va_fourcc == va_fourcc) + return vulkan_drm_format_map[i].vk_format; + return VK_FORMAT_UNDEFINED; +} + +static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame, + AVDRMFrameDescriptor *desc) +{ + int err = 0; + VkResult ret; + AVVkFrame *f; + AVHWDeviceContext *ctx = hwfc->device_ctx; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VulkanDevicePriv *p = ctx->internal->priv; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(hwfc->sw_format); + const int has_modifiers = p->extensions & EXT_DRM_MODIFIER_FLAGS; + VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS]; + VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS]; + VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + + VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdPropertiesKHR); + + for (int i = 0; i < desc->nb_layers; i++) { + if (desc->layers[i].nb_planes > 1) { + av_log(ctx, AV_LOG_ERROR, "Cannot import DMABUFS with more than 1 " + "plane per layer!\n"); + return AVERROR(EINVAL); + } + + if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) { + av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format!\n"); + return AVERROR(EINVAL); + } + } + + if (!(f = av_mallocz(sizeof(*f)))) { + av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n"); + err = AVERROR(ENOMEM); + goto fail; + } + + for (int i = 0; i < desc->nb_objects; i++) { + VkMemoryFdPropertiesKHR fdmp = { + .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR, + }; + VkMemoryRequirements req = { + .size = desc->objects[i].size, + }; + VkImportMemoryFdInfoKHR idesc = { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, + .handleType = htype, + .fd = desc->objects[i].fd, + }; + + ret = pfn_vkGetMemoryFdPropertiesKHR(hwctx->act_dev, htype, + desc->objects[i].fd, &fdmp); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n", + vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + req.memoryTypeBits = fdmp.memoryTypeBits; + + err = alloc_mem(ctx, &req, 0x0, &idesc, &f->flags, &f->mem[i]); + if (err) + return err; + + f->size[i] = desc->objects[i].size; + } + + f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT : + desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ? + VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; + + for (int i = 0; i < desc->nb_layers; i++) { + VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT, + .drmFormatModifier = desc->objects[0].format_modifier, + .drmFormatModifierPlaneCount = desc->layers[i].nb_planes, + .pPlaneLayouts = (const VkSubresourceLayout *)&plane_data, + }; + + VkExternalMemoryImageCreateInfo einfo = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, + .pNext = has_modifiers ? &drm_info : NULL, + .handleTypes = htype, + }; + + VkSemaphoreCreateInfo sem_spawn = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + }; + + const int p_w = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, fmt_desc->log2_chroma_w) : hwfc->width; + const int p_h = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, fmt_desc->log2_chroma_h) : hwfc->height; + + VkImageCreateInfo image_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = &einfo, + .imageType = VK_IMAGE_TYPE_2D, + .format = drm_to_vulkan_fmt(desc->layers[i].format), + .extent.width = p_w, + .extent.height = p_h, + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .flags = VK_IMAGE_CREATE_ALIAS_BIT, + .tiling = f->tiling, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */ + .usage = DEFAULT_USAGE_FLAGS, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .samples = VK_SAMPLE_COUNT_1_BIT, + }; + + for (int j = 0; j < desc->layers[i].nb_planes; j++) { + plane_data[j].offset = desc->layers[i].planes[j].offset; + plane_data[j].rowPitch = desc->layers[i].planes[j].pitch; + plane_data[j].size = 0; /* The specs say so for all 3 */ + plane_data[j].arrayPitch = 0; + plane_data[j].depthPitch = 0; + } + + /* Create image */ + ret = vkCreateImage(hwctx->act_dev, &image_create_info, + hwctx->alloc, &f->img[i]); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n", + vk_ret2str(ret)); + err = AVERROR(EINVAL); + goto fail; + } + + ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn, + hwctx->alloc, &f->sem[i]); + if (ret != VK_SUCCESS) { + av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + /* We'd import a semaphore onto the one we created using + * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI + * offer us anything we could import and sync with, so instead + * leave the semaphore unsignalled and enjoy the validation spam. */ + + f->layout[i] = image_create_info.initialLayout; + f->access[i] = 0x0; + + /* TODO: Fix to support more than 1 plane per layer */ + bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; + bind_info[i].pNext = NULL; + bind_info[i].image = f->img[i]; + bind_info[i].memory = f->mem[desc->layers[i].planes[0].object_index]; + bind_info[i].memoryOffset = desc->layers[i].planes[0].offset; + } + + /* Bind the allocated memory to the images */ + ret = vkBindImageMemory2(hwctx->act_dev, planes, bind_info); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + *frame = f; + + return 0; + +fail: + for (int i = 0; i < planes; i++) { + vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); + vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); + vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); + } + + av_free(f); + + return err; +} + +static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + int err = 0; + AVVkFrame *f; + VulkanMapping *map = NULL; + + err = vulkan_map_from_drm_frame_desc(hwfc, &f, + (AVDRMFrameDescriptor *)src->data[0]); + if (err) + goto fail; + + /* The unmapping function will free this */ + dst->data[0] = (uint8_t *)f; + dst->width = src->width; + dst->height = src->height; + + map = av_mallocz(sizeof(VulkanMapping)); + if (!map) + goto fail; + + map->frame = f; + map->flags = flags; + + err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src, + &vulkan_unmap_from, map); + if (err < 0) + goto fail; + + av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n"); + + return 0; + +fail: + vulkan_frame_free(hwfc->device_ctx->hwctx, (uint8_t *)f); + av_free(map); + return err; +} + +#if CONFIG_VAAPI +static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc, + AVFrame *dst, const AVFrame *src, + int flags) +{ + int err; + AVFrame *tmp = av_frame_alloc(); + AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data; + AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx; + VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3]; + + if (!tmp) + return AVERROR(ENOMEM); + + /* We have to sync since like the previous comment said, no semaphores */ + vaSyncSurface(vaapi_ctx->display, surface_id); + + tmp->format = AV_PIX_FMT_DRM_PRIME; + + err = av_hwframe_map(tmp, src, flags); + if (err < 0) + goto fail; + + err = vulkan_map_from_drm(dst_fc, dst, tmp, flags); + if (err < 0) + goto fail; + + err = ff_hwframe_map_replace(dst, src); + +fail: + av_frame_free(&tmp); + return err; +} +#endif +#endif + +#if CONFIG_CUDA +static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, + AVBufferRef *cuda_hwfc, + const AVFrame *frame) +{ + int err; + VkResult ret; + AVVkFrame *dst_f; + AVVkFrameInternal *dst_int; + AVHWDeviceContext *ctx = hwfc->device_ctx; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); + VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR); + VK_LOAD_PFN(hwctx->inst, vkGetSemaphoreFdKHR); + + AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data; + AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; + AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; + AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; + CudaFunctions *cu = cu_internal->cuda_dl; + CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 : + CU_AD_FORMAT_UNSIGNED_INT8; + + dst_f = (AVVkFrame *)frame->data[0]; + + dst_int = dst_f->internal; + if (!dst_int || !dst_int->cuda_fc_ref) { + if (!dst_f->internal) + dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal)); + + if (!dst_int) { + err = AVERROR(ENOMEM); + goto fail; + } + + dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc); + if (!dst_int->cuda_fc_ref) { + err = AVERROR(ENOMEM); + goto fail; + } + + for (int i = 0; i < planes; i++) { + CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = { + .offset = 0, + .arrayDesc = { + .Width = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w) + : hwfc->width, + .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h) + : hwfc->height, + .Depth = 0, + .Format = cufmt, + .NumChannels = 1 + ((planes == 2) && i), + .Flags = 0, + }, + .numLevels = 1, + }; + CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = { + .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD, + .size = dst_f->size[i], + }; + VkMemoryGetFdInfoKHR export_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, + .memory = dst_f->mem[i], + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, + }; + VkSemaphoreGetFdInfoKHR sem_export = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, + .semaphore = dst_f->sem[i], + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, + }; + CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = { + .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD, + }; + + ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info, + &ext_desc.handle.fd); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n"); + err = AVERROR_EXTERNAL; + goto fail; + } + + ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[i], &ext_desc)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + + ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i], + dst_int->ext_mem[i], + &tex_desc)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + + ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i], + dst_int->cu_mma[i], 0)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + + ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export, + &ext_sem_desc.handle.fd); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i], + &ext_sem_desc)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + } + } + return 0; + +fail: + return -err; +} + +static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, + AVFrame *dst, const AVFrame *src) +{ + int err; + VkResult ret; + CUcontext dummy; + AVVkFrame *dst_f; + AVVkFrameInternal *dst_int; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); + + AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data; + AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; + AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; + AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; + CudaFunctions *cu = cu_internal->cuda_dl; + CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 }; + CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 }; + + ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + + dst_f = (AVVkFrame *)dst->data[0]; + + ret = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst); + if (ret < 0) { + goto fail; + } + dst_int = dst_f->internal; + + ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par, + planes, cuda_dev->stream)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + + for (int i = 0; i < planes; i++) { + CUDA_MEMCPY2D cpy = { + .srcMemoryType = CU_MEMORYTYPE_DEVICE, + .srcDevice = (CUdeviceptr)src->data[i], + .srcPitch = src->linesize[i], + .srcY = 0, + + .dstMemoryType = CU_MEMORYTYPE_ARRAY, + .dstArray = dst_int->cu_array[i], + .WidthInBytes = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w) + : hwfc->width) * desc->comp[i].step, + .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h) + : hwfc->height, + }; + + ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + } + + ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par, + planes, cuda_dev->stream)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + + av_log(hwfc, AV_LOG_VERBOSE, "Transfered CUDA image to Vulkan!\n"); + + return 0; + +fail: + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + vulkan_free_internal(dst_int); + dst_f->internal = NULL; + av_buffer_unref(&dst->buf[0]); + return err; +} +#endif + +static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + + switch (src->format) { +#if CONFIG_LIBDRM +#if CONFIG_VAAPI + case AV_PIX_FMT_VAAPI: + if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY) + return vulkan_map_from_vaapi(hwfc, dst, src, flags); +#endif + case AV_PIX_FMT_DRM_PRIME: + if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY) + return vulkan_map_from_drm(hwfc, dst, src, flags); +#endif + default: + return AVERROR(ENOSYS); + } +} + +#if CONFIG_LIBDRM +typedef struct VulkanDRMMapping { + AVDRMFrameDescriptor drm_desc; + AVVkFrame *source; +} VulkanDRMMapping; + +static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) +{ + AVDRMFrameDescriptor *drm_desc = hwmap->priv; + + for (int i = 0; i < drm_desc->nb_objects; i++) + close(drm_desc->objects[i].fd); + + av_free(drm_desc); +} + +static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt) +{ + for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++) + if (vulkan_drm_format_map[i].vk_format == vkfmt) + return vulkan_drm_format_map[i].va_fourcc; + return DRM_FORMAT_INVALID; +} + +static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + int err = 0; + VkResult ret; + AVVkFrame *f = (AVVkFrame *)src->data[0]; + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR); + VkImageDrmFormatModifierPropertiesEXT drm_mod = { + .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT, + }; + + AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc)); + if (!drm_desc) + return AVERROR(ENOMEM); + + err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc); + if (err < 0) + goto end; + + if (p->extensions & EXT_DRM_MODIFIER_FLAGS) { + VK_LOAD_PFN(hwctx->inst, vkGetImageDrmFormatModifierPropertiesEXT); + ret = pfn_vkGetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0], + &drm_mod); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n"); + err = AVERROR_EXTERNAL; + goto end; + } + } + + for (int i = 0; (i < planes) && (f->mem[i]); i++) { + VkMemoryGetFdInfoKHR export_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, + .memory = f->mem[i], + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, + }; + + ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info, + &drm_desc->objects[i].fd); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n"); + err = AVERROR_EXTERNAL; + goto end; + } + + drm_desc->nb_objects++; + drm_desc->objects[i].size = f->size[i]; + drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier; + } + + drm_desc->nb_layers = planes; + for (int i = 0; i < drm_desc->nb_layers; i++) { + VkSubresourceLayout layout; + VkImageSubresource sub = { + .aspectMask = p->extensions & EXT_DRM_MODIFIER_FLAGS ? + VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT : + VK_IMAGE_ASPECT_COLOR_BIT, + }; + VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i]; + + drm_desc->layers[i].format = vulkan_fmt_to_drm(plane_vkfmt); + drm_desc->layers[i].nb_planes = 1; + + if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) { + av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n"); + err = AVERROR_PATCHWELCOME; + goto end; + } + + drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1); + + if (f->tiling != VK_IMAGE_TILING_OPTIMAL) + continue; + + vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout); + drm_desc->layers[i].planes[0].offset = layout.offset; + drm_desc->layers[i].planes[0].pitch = layout.rowPitch; + } + + dst->width = src->width; + dst->height = src->height; + dst->data[0] = (uint8_t *)drm_desc; + + av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n"); + + return 0; + +end: + av_free(drm_desc); + return err; +} + +#if CONFIG_VAAPI +static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + int err; + AVFrame *tmp = av_frame_alloc(); + if (!tmp) + return AVERROR(ENOMEM); + + tmp->format = AV_PIX_FMT_DRM_PRIME; + + err = vulkan_map_to_drm(hwfc, tmp, src, flags); + if (err < 0) + goto fail; + + err = av_hwframe_map(dst, tmp, flags); + if (err < 0) + goto fail; + + err = ff_hwframe_map_replace(dst, src); + +fail: + av_frame_free(&tmp); + return err; +} +#endif +#endif + +static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + + switch (dst->format) { +#if CONFIG_LIBDRM + case AV_PIX_FMT_DRM_PRIME: + if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY) + return vulkan_map_to_drm(hwfc, dst, src, flags); +#if CONFIG_VAAPI + case AV_PIX_FMT_VAAPI: + if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY) + return vulkan_map_to_vaapi(hwfc, dst, src, flags); +#endif +#endif + default: + return vulkan_map_frame_to_mem(hwfc, dst, src, flags); + } +} + +typedef struct ImageBuffer { + VkBuffer buf; + VkDeviceMemory mem; + VkMemoryPropertyFlagBits flags; +} ImageBuffer; + +static void free_buf(AVHWDeviceContext *ctx, ImageBuffer *buf) +{ + AVVulkanDeviceContext *hwctx = ctx->hwctx; + if (!buf) + return; + + vkDestroyBuffer(hwctx->act_dev, buf->buf, hwctx->alloc); + vkFreeMemory(hwctx->act_dev, buf->mem, hwctx->alloc); +} + +static int create_buf(AVHWDeviceContext *ctx, ImageBuffer *buf, int height, + int *stride, VkBufferUsageFlags usage, + VkMemoryPropertyFlagBits flags, void *create_pnext, + void *alloc_pnext) +{ + int err; + VkResult ret; + VkMemoryRequirements req; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VulkanDevicePriv *p = ctx->internal->priv; + + VkBufferCreateInfo buf_spawn = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = create_pnext, + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + + *stride = FFALIGN(*stride, p->props.limits.optimalBufferCopyRowPitchAlignment); + buf_spawn.size = height*(*stride); + + ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &buf->buf); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + vkGetBufferMemoryRequirements(hwctx->act_dev, buf->buf, &req); + + err = alloc_mem(ctx, &req, flags, alloc_pnext, &buf->flags, &buf->mem); + if (err) + return err; + + ret = vkBindBufferMemory(hwctx->act_dev, buf->buf, buf->mem, 0); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n", + vk_ret2str(ret)); + free_buf(ctx, buf); + return AVERROR_EXTERNAL; + } + + return 0; +} + +static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, uint8_t *mem[], + int nb_buffers, int invalidate) +{ + VkResult ret; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS]; + int invalidate_count = 0; + + for (int i = 0; i < nb_buffers; i++) { + ret = vkMapMemory(hwctx->act_dev, buf[i].mem, 0, + VK_WHOLE_SIZE, 0, (void **)&mem[i]); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + if (!invalidate) + return 0; + + for (int i = 0; i < nb_buffers; i++) { + const VkMappedMemoryRange ival_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = buf[i].mem, + .size = VK_WHOLE_SIZE, + }; + if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + continue; + invalidate_ctx[invalidate_count++] = ival_buf; + } + + if (invalidate_count) { + ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count, + invalidate_ctx); + if (ret != VK_SUCCESS) + av_log(ctx, AV_LOG_WARNING, "Failed to invalidate memory: %s\n", + vk_ret2str(ret)); + } + + return 0; +} + +static int unmap_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, + int nb_buffers, int flush) +{ + int err = 0; + VkResult ret; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS]; + int flush_count = 0; + + if (flush) { + for (int i = 0; i < nb_buffers; i++) { + const VkMappedMemoryRange flush_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = buf[i].mem, + .size = VK_WHOLE_SIZE, + }; + if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + continue; + flush_ctx[flush_count++] = flush_buf; + } + } + + if (flush_count) { + ret = vkFlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n", + vk_ret2str(ret)); + err = AVERROR_EXTERNAL; /* We still want to try to unmap them */ + } + } + + for (int i = 0; i < nb_buffers; i++) + vkUnmapMemory(hwctx->act_dev, buf[i].mem); + + return err; +} + +static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame, + ImageBuffer *buffer, const int *buf_stride, int w, + int h, enum AVPixelFormat pix_fmt, int to_buf) +{ + VkResult ret; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VulkanDevicePriv *s = ctx->internal->priv; + VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS]; + + const int planes = av_pix_fmt_count_planes(pix_fmt); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); + + VkCommandBufferBeginInfo cmd_start = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + + VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; + + VkSubmitInfo s_info = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &s->cmd.buf, + .pSignalSemaphores = frame->sem, + .pWaitSemaphores = frame->sem, + .pWaitDstStageMask = sem_wait_dst, + .signalSemaphoreCount = planes, + .waitSemaphoreCount = planes, + }; + + ret = vkBeginCommandBuffer(s->cmd.buf, &cmd_start); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Unable to init command buffer: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + /* Change the image layout to something more optimal for transfers */ + for (int i = 0; i < planes; i++) { + img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + img_bar[i].srcAccessMask = 0x0; + img_bar[i].dstAccessMask = to_buf ? VK_ACCESS_TRANSFER_READ_BIT : + VK_ACCESS_TRANSFER_WRITE_BIT; + img_bar[i].oldLayout = frame->layout[i]; + img_bar[i].newLayout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL : + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + img_bar[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + img_bar[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + img_bar[i].image = frame->img[i]; + img_bar[i].subresourceRange.levelCount = 1; + img_bar[i].subresourceRange.layerCount = 1; + img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + + sem_wait_dst[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + + frame->layout[i] = img_bar[i].newLayout; + frame->access[i] = img_bar[i].dstAccessMask; + } + + vkCmdPipelineBarrier(s->cmd.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + 0, NULL, 0, NULL, planes, img_bar); + + /* Schedule a copy for each plane */ + for (int i = 0; i < planes; i++) { + const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w; + const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h; + VkBufferImageCopy buf_reg = { + .bufferOffset = 0, + /* Buffer stride isn't in bytes, it's in samples, the implementation + * uses the image's VkFormat to know how many bytes per sample + * the buffer has. So we have to convert by dividing. Stupid. + * Won't work with YUVA or other planar formats with alpha. */ + .bufferRowLength = buf_stride[i] / desc->comp[i].step, + .bufferImageHeight = p_h, + .imageSubresource.layerCount = 1, + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .imageOffset = { 0, 0, 0, }, + .imageExtent = { p_w, p_h, 1, }, + }; + + if (to_buf) + vkCmdCopyImageToBuffer(s->cmd.buf, frame->img[i], frame->layout[i], + buffer[i].buf, 1, &buf_reg); + else + vkCmdCopyBufferToImage(s->cmd.buf, buffer[i].buf, frame->img[i], + frame->layout[i], 1, &buf_reg); + } + + ret = vkEndCommandBuffer(s->cmd.buf); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + /* Wait for the download/upload to finish if uploading, otherwise the + * semaphore will take care of synchronization when uploading */ + ret = vkQueueSubmit(s->cmd.queue, 1, &s_info, s->cmd.fence); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } else { + vkWaitForFences(hwctx->act_dev, 1, &s->cmd.fence, VK_TRUE, UINT64_MAX); + vkResetFences(hwctx->act_dev, 1, &s->cmd.fence); + } + + return 0; +} + +/* Technically we can use VK_EXT_external_memory_host to upload and download, + * however the alignment requirements make this unfeasible as both the pointer + * and the size of each plane need to be aligned to the minimum alignment + * requirement, which on all current implementations (anv, radv) is 4096. + * If the requirement gets relaxed (unlikely) this can easily be implemented. */ +static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src) +{ + int err = 0; + AVFrame tmp; + AVVkFrame *f = (AVVkFrame *)dst->data[0]; + AVHWDeviceContext *dev_ctx = hwfc->device_ctx; + ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } }; + const int planes = av_pix_fmt_count_planes(src->format); + int log2_chroma = av_pix_fmt_desc_get(src->format)->log2_chroma_h; + + if ((src->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(src->format))) { + av_log(hwfc, AV_LOG_ERROR, "Unsupported source pixel format!\n"); + return AVERROR(EINVAL); + } + + if (src->width > hwfc->width || src->height > hwfc->height) + return AVERROR(EINVAL); + + /* For linear, host visiable images */ + if (f->tiling == VK_IMAGE_TILING_LINEAR && + f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + AVFrame *map = av_frame_alloc(); + if (!map) + return AVERROR(ENOMEM); + map->format = src->format; + + err = vulkan_map_frame_to_mem(hwfc, map, dst, AV_HWFRAME_MAP_WRITE); + if (err) + goto end; + + err = av_frame_copy(map, src); + av_frame_free(&map); + goto end; + } + + /* Create buffers */ + for (int i = 0; i < planes; i++) { + int h = src->height; + int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h; + + tmp.linesize[i] = src->linesize[i]; + err = create_buf(dev_ctx, &buf[i], p_height, + &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL); + if (err) + goto end; + } + + /* Map, copy image to buffer, unmap */ + if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 0))) + goto end; + + av_image_copy(tmp.data, tmp.linesize, (const uint8_t **)src->data, + src->linesize, src->format, src->width, src->height); + + if ((err = unmap_buffers(dev_ctx, buf, planes, 1))) + goto end; + + /* Copy buffers to image */ + err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize, + src->width, src->height, src->format, 0); + +end: + for (int i = 0; i < planes; i++) + free_buf(dev_ctx, &buf[i]); + + return err; +} + +static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src) +{ + av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + + switch (src->format) { +#if CONFIG_CUDA + case AV_PIX_FMT_CUDA: + if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) && + (p->extensions & EXT_EXTERNAL_FD_SEM)) + return vulkan_transfer_data_from_cuda(hwfc, dst, src); +#endif + default: + if (src->hw_frames_ctx) + return AVERROR(ENOSYS); + else + return vulkan_transfer_data_from_mem(hwfc, dst, src); + } +} + +#if CONFIG_CUDA +static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src) +{ + int err; + VkResult ret; + CUcontext dummy; + AVVkFrame *dst_f; + AVVkFrameInternal *dst_int; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); + + AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data; + AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; + AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; + AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; + CudaFunctions *cu = cu_internal->cuda_dl; + + ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + + dst_f = (AVVkFrame *)src->data[0]; + + err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src); + if (err < 0) { + goto fail; + } + + dst_int = dst_f->internal; + + for (int i = 0; i < planes; i++) { + CUDA_MEMCPY2D cpy = { + .dstMemoryType = CU_MEMORYTYPE_DEVICE, + .dstDevice = (CUdeviceptr)dst->data[i], + .dstPitch = dst->linesize[i], + .dstY = 0, + + .srcMemoryType = CU_MEMORYTYPE_ARRAY, + .srcArray = dst_int->cu_array[i], + .WidthInBytes = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w) + : hwfc->width) * desc->comp[i].step, + .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h) + : hwfc->height, + }; + + ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + } + + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + + av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n"); + + return 0; + +fail: + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + vulkan_free_internal(dst_int); + dst_f->internal = NULL; + av_buffer_unref(&dst->buf[0]); + return err; +} +#endif + +static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src) +{ + int err = 0; + AVFrame tmp; + AVVkFrame *f = (AVVkFrame *)src->data[0]; + AVHWDeviceContext *dev_ctx = hwfc->device_ctx; + ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } }; + const int planes = av_pix_fmt_count_planes(dst->format); + int log2_chroma = av_pix_fmt_desc_get(dst->format)->log2_chroma_h; + + if (dst->width > hwfc->width || dst->height > hwfc->height) + return AVERROR(EINVAL); + + /* For linear, host visiable images */ + if (f->tiling == VK_IMAGE_TILING_LINEAR && + f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + AVFrame *map = av_frame_alloc(); + if (!map) + return AVERROR(ENOMEM); + map->format = dst->format; + + err = vulkan_map_frame_to_mem(hwfc, map, src, AV_HWFRAME_MAP_READ); + if (err) + return err; + + err = av_frame_copy(dst, map); + av_frame_free(&map); + return err; + } + + /* Create buffers */ + for (int i = 0; i < planes; i++) { + int h = dst->height; + int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h; + + tmp.linesize[i] = dst->linesize[i]; + err = create_buf(dev_ctx, &buf[i], p_height, + &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL); + } + + /* Copy image to buffer */ + if ((err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize, + dst->width, dst->height, dst->format, 1))) + goto end; + + /* Map, copy buffer to frame, unmap */ + if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 1))) + goto end; + + av_image_copy(dst->data, dst->linesize, (const uint8_t **)tmp.data, + tmp.linesize, dst->format, dst->width, dst->height); + + err = unmap_buffers(dev_ctx, buf, planes, 0); + +end: + for (int i = 0; i < planes; i++) + free_buf(dev_ctx, &buf[i]); + + return err; +} + +static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src) +{ + av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + + switch (dst->format) { +#if CONFIG_CUDA + case AV_PIX_FMT_CUDA: + if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) && + (p->extensions & EXT_EXTERNAL_FD_SEM)) + return vulkan_transfer_data_to_cuda(hwfc, dst, src); +#endif + default: + if (dst->hw_frames_ctx) + return AVERROR(ENOSYS); + else + return vulkan_transfer_data_to_mem(hwfc, dst, src); + } +} + +const HWContextType ff_hwcontext_type_vulkan = { + .type = AV_HWDEVICE_TYPE_VULKAN, + .name = "Vulkan", + + .device_hwctx_size = sizeof(AVVulkanDeviceContext), + .device_priv_size = sizeof(VulkanDevicePriv), + .frames_hwctx_size = sizeof(AVVulkanFramesContext), + .frames_priv_size = sizeof(VulkanFramesPriv), + + .device_init = &vulkan_device_init, + .device_create = &vulkan_device_create, + .device_derive = &vulkan_device_derive, + + .frames_get_constraints = &vulkan_frames_get_constraints, + .frames_init = vulkan_frames_init, + .frames_get_buffer = vulkan_get_buffer, + .frames_uninit = vulkan_frames_uninit, + + .transfer_get_formats = vulkan_transfer_get_formats, + .transfer_data_to = vulkan_transfer_data_to, + .transfer_data_from = vulkan_transfer_data_from, + + .map_to = vulkan_map_to, + .map_from = vulkan_map_from, + + .pix_fmts = (const enum AVPixelFormat []) { + AV_PIX_FMT_VULKAN, + AV_PIX_FMT_NONE + }, +}; diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h new file mode 100644 index 0000000000000..9067a6f624133 --- /dev/null +++ b/libavutil/hwcontext_vulkan.h @@ -0,0 +1,150 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_HWCONTEXT_VULKAN_H +#define AVUTIL_HWCONTEXT_VULKAN_H + +#include + +/** + * @file + * API-specific header for AV_HWDEVICE_TYPE_VULKAN. + * + * For user-allocated pools, AVHWFramesContext.pool must return AVBufferRefs + * with the data pointer set to an AVVkFrame. + */ + +/** + * Main Vulkan context, allocated as AVHWDeviceContext.hwctx. + * All of these can be set before init to change what the context uses + */ +typedef struct AVVulkanDeviceContext { + /** + * Custom memory allocator, else NULL + */ + const VkAllocationCallbacks *alloc; + /** + * Instance + */ + VkInstance inst; + /** + * Physical device + */ + VkPhysicalDevice phys_dev; + /** + * Activated physical device + */ + VkDevice act_dev; + /** + * Queue family index for graphics + */ + int queue_family_index; + /** + * Queue family index for transfer ops only. By default, the priority order + * is dedicated transfer > dedicated compute > graphics. + */ + int queue_family_tx_index; + /** + * Queue family index for compute ops. Will be equal to the graphics + * one unless a dedicated transfer queue is found. + */ + int queue_family_comp_index; + /** + * The UUID of the selected physical device. + */ + uint8_t device_uuid[VK_UUID_SIZE]; +} AVVulkanDeviceContext; + +/** + * Allocated as AVHWFramesContext.hwctx, used to set pool-specific options + */ +typedef struct AVVulkanFramesContext { + /** + * Controls the tiling of output frames. + */ + VkImageTiling tiling; + /** + * Defines extra usage of output frames. This is bitwise OR'd with the + * standard usage flags (SAMPLED, STORAGE, TRANSFER_SRC and TRANSFER_DST). + */ + VkImageUsageFlagBits usage; + /** + * Extension data for image creation. By default, if the extension is + * available, this will be chained to a VkImageFormatListCreateInfoKHR. + */ + void *create_pnext; + /** + * Extension data for memory allocation. Must have as many entries as + * the number of planes of the sw_format. + * This will be chained to VkExportMemoryAllocateInfo, which is used + * to make all pool images exportable to other APIs. + */ + void *alloc_pnext[AV_NUM_DATA_POINTERS]; +} AVVulkanFramesContext; + +/* + * Frame structure, the VkFormat of the image will always match + * the pool's sw_format. + * All frames, imported or allocated, will be created with the + * VK_IMAGE_CREATE_ALIAS_BIT flag set, so the memory may be aliased if needed. + */ +typedef struct AVVkFrame { + /** + * Vulkan images to which the memory is bound to. + */ + VkImage img[AV_NUM_DATA_POINTERS]; + + /** + * Same tiling must be used for all images. + */ + VkImageTiling tiling; + + /** + * Memory backing the images. Could be less than the amount of images + * if importing from a DRM or VAAPI frame. + */ + VkDeviceMemory mem[AV_NUM_DATA_POINTERS]; + size_t size[AV_NUM_DATA_POINTERS]; + + /** + * OR'd flags for all memory allocated + */ + VkMemoryPropertyFlagBits flags; + + /** + * Updated after every barrier + */ + VkAccessFlagBits access[AV_NUM_DATA_POINTERS]; + VkImageLayout layout[AV_NUM_DATA_POINTERS]; + + /** + * Per-image semaphores. Must not be freed manually. Must be waited on + * and signalled at every queue submission. + */ + VkSemaphore sem[AV_NUM_DATA_POINTERS]; + + /** + * Internal data. + */ + struct AVVkFrameInternal *internal; +} AVVkFrame; + +/* Returns the format of each image up to the number of planes for a given sw_format. */ +const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p); + +#endif /* AVUTIL_HWCONTEXT_VULKAN_H */ diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c index 05dd4a1e20d8a..64178b7d563b9 100644 --- a/libavutil/pixdesc.c +++ b/libavutil/pixdesc.c @@ -2344,6 +2344,10 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { }, .flags = AV_PIX_FMT_FLAG_PLANAR, }, + [AV_PIX_FMT_VULKAN] = { + .name = "vulkan", + .flags = AV_PIX_FMT_FLAG_HWACCEL, + }, }; #if FF_API_PLUS1_MINUS1 FF_ENABLE_DEPRECATION_WARNINGS diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h index 37ecebd50122e..b2bb91defd4c6 100644 --- a/libavutil/pixfmt.h +++ b/libavutil/pixfmt.h @@ -348,6 +348,9 @@ enum AVPixelFormat { AV_PIX_FMT_NV24, ///< planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (first byte U and the following byte V) AV_PIX_FMT_NV42, ///< as above, but U and V bytes are swapped + /* Vulkan hardware images, data[0] contain an AVVkFrame */ + AV_PIX_FMT_VULKAN, + AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions }; From 85caee81a62977c2d2bd021047454905483c9bdb Mon Sep 17 00:00:00 2001 From: Philip Langdale Date: Wed, 23 Oct 2019 18:11:37 -0700 Subject: [PATCH 3/9] avfilter/vf_hwupload: Add support for HW -> HW transfers As we find ourselves wanting a way to transfer frames between HW devices (or more realistically, between APIs on the same device), it's desirable to have a way to describe the relationship. While we could imagine introducing a `hwtransfer` filter, there is almost no difference from `hwupload`. The main new feature we need is a way to specify the target device. Having a single device for the filter chain is obviously insufficient if we're dealing with two devices. So let's add a way to specify the upload target device, and if none is specified, continue with the existing behaviour. We must also correctly preserve the sw_format on such a transfer. --- doc/filters.texi | 13 ++++++++- libavfilter/vf_hwupload.c | 51 +++++++++++++++++++++++++--------- libavfilter/vf_hwupload_cuda.c | 10 ++++++- 3 files changed, 59 insertions(+), 15 deletions(-) diff --git a/doc/filters.texi b/doc/filters.texi index 57330d1fd909d..5037be29eb173 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -11893,7 +11893,18 @@ Upload system memory frames to hardware surfaces. The device to upload to must be supplied when the filter is initialised. If using ffmpeg, select the appropriate device with the @option{-filter_hw_device} -option. +option or with the @option{derive_device} option. The input and output devices +must be of different types and compatible - the exact meaning of this is +system-dependent, but typically it means that they must refer to the same +underlying hardware context (for example, refer to the same graphics card). + +The following additional parameters are accepted: + +@table @option +@item derive_device @var{type} +Rather than using the device supplied at initialisation, instead derive a new +device of type @var{type} from the device the input frames exist on. +@end table @anchor{hwupload_cuda} @section hwupload_cuda diff --git a/libavfilter/vf_hwupload.c b/libavfilter/vf_hwupload.c index 50bc7e10f6eb9..7c5dd497b0ea2 100644 --- a/libavfilter/vf_hwupload.c +++ b/libavfilter/vf_hwupload.c @@ -32,10 +32,11 @@ typedef struct HWUploadContext { const AVClass *class; AVBufferRef *hwdevice_ref; - AVHWDeviceContext *hwdevice; AVBufferRef *hwframes_ref; AVHWFramesContext *hwframes; + + char *device_type; } HWUploadContext; static int hwupload_query_formats(AVFilterContext *avctx) @@ -46,17 +47,27 @@ static int hwupload_query_formats(AVFilterContext *avctx) AVFilterFormats *input_formats = NULL; int err, i; - if (!avctx->hw_device_ctx) { + if (ctx->hwdevice_ref) { + /* We already have a specified device. */ + } else if (avctx->hw_device_ctx) { + if (ctx->device_type) { + err = av_hwdevice_ctx_create_derived( + &ctx->hwdevice_ref, + av_hwdevice_find_type_by_name(ctx->device_type), + avctx->hw_device_ctx, 0); + if (err < 0) + return err; + } else { + ctx->hwdevice_ref = av_buffer_ref(avctx->hw_device_ctx); + if (!ctx->hwdevice_ref) + return AVERROR(ENOMEM); + } + } else { av_log(ctx, AV_LOG_ERROR, "A hardware device reference is required " "to upload frames to.\n"); return AVERROR(EINVAL); } - ctx->hwdevice_ref = av_buffer_ref(avctx->hw_device_ctx); - if (!ctx->hwdevice_ref) - return AVERROR(ENOMEM); - ctx->hwdevice = (AVHWDeviceContext*)ctx->hwdevice_ref->data; - constraints = av_hwdevice_get_hwframe_constraints(ctx->hwdevice_ref, NULL); if (!constraints) { err = AVERROR(EINVAL); @@ -127,7 +138,13 @@ static int hwupload_config_output(AVFilterLink *outlink) av_get_pix_fmt_name(inlink->format)); ctx->hwframes->format = outlink->format; - ctx->hwframes->sw_format = inlink->format; + if (inlink->hw_frames_ctx) { + AVHWFramesContext *in_hwframe_ctx = + (AVHWFramesContext*)inlink->hw_frames_ctx->data; + ctx->hwframes->sw_format = in_hwframe_ctx->sw_format; + } else { + ctx->hwframes->sw_format = inlink->format; + } ctx->hwframes->width = inlink->w; ctx->hwframes->height = inlink->h; @@ -200,13 +217,21 @@ static av_cold void hwupload_uninit(AVFilterContext *avctx) av_buffer_unref(&ctx->hwdevice_ref); } -static const AVClass hwupload_class = { - .class_name = "hwupload", - .item_name = av_default_item_name, - .option = NULL, - .version = LIBAVUTIL_VERSION_INT, +#define OFFSET(x) offsetof(HWUploadContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption hwupload_options[] = { + { + "derive_device", "Derive a new device of this type", + OFFSET(device_type), AV_OPT_TYPE_STRING, + { .str = NULL }, 0, 0, FLAGS + }, + { + NULL + } }; +AVFILTER_DEFINE_CLASS(hwupload); + static const AVFilterPad hwupload_inputs[] = { { .name = "default", diff --git a/libavfilter/vf_hwupload_cuda.c b/libavfilter/vf_hwupload_cuda.c index 4d83e6c8f27ed..8ee0825859c08 100644 --- a/libavfilter/vf_hwupload_cuda.c +++ b/libavfilter/vf_hwupload_cuda.c @@ -60,6 +60,9 @@ static int cudaupload_query_formats(AVFilterContext *ctx) AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_P010, AV_PIX_FMT_P016, AV_PIX_FMT_YUV444P16, AV_PIX_FMT_0RGB32, AV_PIX_FMT_0BGR32, +#if CONFIG_VULKAN + AV_PIX_FMT_VULKAN, +#endif AV_PIX_FMT_NONE, }; static const enum AVPixelFormat output_pix_fmts[] = { @@ -97,7 +100,12 @@ static int cudaupload_config_output(AVFilterLink *outlink) hwframe_ctx = (AVHWFramesContext*)s->hwframe->data; hwframe_ctx->format = AV_PIX_FMT_CUDA; - hwframe_ctx->sw_format = inlink->format; + if (inlink->hw_frames_ctx) { + AVHWFramesContext *in_hwframe_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; + hwframe_ctx->sw_format = in_hwframe_ctx->sw_format; + } else { + hwframe_ctx->sw_format = inlink->format; + } hwframe_ctx->width = inlink->w; hwframe_ctx->height = inlink->h; From 6aab205a9bc89588241658d180b0da57dd9b5c88 Mon Sep 17 00:00:00 2001 From: Lynne Date: Sun, 27 Oct 2019 14:44:00 +0000 Subject: [PATCH 4/9] lavfi: add Vulkan filtering framework This commit adds a Vulkan filtering infrastructure for libavfilter. It attempts to abstract as much as possible of the Vulkan API from filters. --- configure | 3 + libavfilter/Makefile | 2 + libavfilter/glslang.cpp | 215 +++++++ libavfilter/glslang.h | 49 ++ libavfilter/vulkan.c | 1221 +++++++++++++++++++++++++++++++++++++++ libavfilter/vulkan.h | 323 +++++++++++ 6 files changed, 1813 insertions(+) create mode 100644 libavfilter/glslang.cpp create mode 100644 libavfilter/glslang.h create mode 100644 libavfilter/vulkan.c create mode 100644 libavfilter/vulkan.h diff --git a/configure b/configure index 6a42981ff1cf4..43a615c20ab4f 100755 --- a/configure +++ b/configure @@ -236,6 +236,7 @@ External library support: --enable-libfontconfig enable libfontconfig, useful for drawtext filter [no] --enable-libfreetype enable libfreetype, needed for drawtext filter [no] --enable-libfribidi enable libfribidi, improves drawtext filter [no] + --enable-libglslang enable GLSL->SPIRV compilation via libglslang [no] --enable-libgme enable Game Music Emu via libgme [no] --enable-libgsm enable GSM de/encoding via libgsm [no] --enable-libiec61883 enable iec61883 via libiec61883 [no] @@ -1771,6 +1772,7 @@ EXTERNAL_LIBRARY_LIST=" libfontconfig libfreetype libfribidi + libglslang libgme libgsm libiec61883 @@ -6259,6 +6261,7 @@ enabled fontconfig && enable libfontconfig enabled libfontconfig && require_pkg_config libfontconfig fontconfig "fontconfig/fontconfig.h" FcInit enabled libfreetype && require_pkg_config libfreetype freetype2 "ft2build.h FT_FREETYPE_H" FT_Init_FreeType enabled libfribidi && require_pkg_config libfribidi fribidi fribidi.h fribidi_version_info +enabled libglslang && require_cpp libglslang SPIRV/GlslangToSpv.h "glslang::TIntermediate*" -lglslang -lHLSL -lOGLCompiler -lOSDependent -lSPIRV -lSPVRemapper -lSPIRV -lstdc++ enabled libgme && { check_pkg_config libgme libgme gme/gme.h gme_new_emu || require libgme gme/gme.h gme_new_emu -lgme -lstdc++; } enabled libgsm && { for gsm_hdr in "gsm.h" "gsm/gsm.h"; do diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 37d4eee85848d..af541a9b8dd29 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -506,6 +506,8 @@ SKIPHEADERS-$(CONFIG_QSVVPP) += qsvvpp.h SKIPHEADERS-$(CONFIG_OPENCL) += opencl.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_vpp.h +OBJS-$(CONFIG_LIBGLSLANG) += glslang.o + TOOLS = graph2dot TESTPROGS = drawutils filtfmts formats integral diff --git a/libavfilter/glslang.cpp b/libavfilter/glslang.cpp new file mode 100644 index 0000000000000..786136186d1b1 --- /dev/null +++ b/libavfilter/glslang.cpp @@ -0,0 +1,215 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include + +extern "C" { +#include "libavutil/mem.h" +} + +#include +#include +#include +#include + +#include "glslang.h" + +using namespace glslang; + +static pthread_mutex_t glslang_mutex = PTHREAD_MUTEX_INITIALIZER; +static int glslang_refcount; + +int glslang_init(void) +{ + int ret = 1; + + pthread_mutex_lock(&glslang_mutex); + if (glslang_refcount++ == 0) + ret = InitializeProcess(); + pthread_mutex_unlock(&glslang_mutex); + + return ret; +} + +void glslang_uninit(void) +{ + pthread_mutex_lock(&glslang_mutex); + if (--glslang_refcount == 0) + FinalizeProcess(); + if (glslang_refcount < 0) + glslang_refcount = 0; + pthread_mutex_unlock(&glslang_mutex); +} + +#define GLSL_VERSION EShTargetVulkan_1_0 +#define SPIRV_VERSION EShTargetSpv_1_0 + +extern const TBuiltInResource DefaultTBuiltInResource; + +GLSlangResult *glslang_compile(const char *glsl, enum GLSlangStage stage) +{ + GLSlangResult *res = (GLSlangResult *)av_mallocz(sizeof(*res)); + + static const EShLanguage lang[] = { + [GLSLANG_VERTEX] = EShLangVertex, + [GLSLANG_FRAGMENT] = EShLangFragment, + [GLSLANG_COMPUTE] = EShLangCompute, + }; + + assert(glslang_refcount); + TShader *shader = new TShader(lang[stage]); + shader->setEnvClient(EShClientVulkan, GLSL_VERSION); + shader->setEnvTarget(EShTargetSpv, SPIRV_VERSION); + shader->setStrings(&glsl, 1); + if (!shader->parse(&DefaultTBuiltInResource, GLSL_VERSION, true, EShMsgDefault)) { + res->error_msg = av_strdup(shader->getInfoLog()); + delete shader; + return res; + } + + TProgram *prog = new TProgram(); + prog->addShader(shader); + if (!prog->link(EShMsgDefault)) { + res->error_msg = av_strdup(prog->getInfoLog()); + delete shader; + delete prog; + return res; + } + + std::vector spirv; + GlslangToSpv(*prog->getIntermediate(lang[stage]), spirv); + + res->success = true; + res->size = spirv.size() * sizeof(unsigned int); + res->data = av_memdup(spirv.data(), res->size), + delete shader; + delete prog; + return res; +} + +// Taken from glslang's examples, which apparently generally bases the choices +// on OpenGL specification limits +const TBuiltInResource DefaultTBuiltInResource = { + /* .MaxLights = */ 32, + /* .MaxClipPlanes = */ 6, + /* .MaxTextureUnits = */ 32, + /* .MaxTextureCoords = */ 32, + /* .MaxVertexAttribs = */ 64, + /* .MaxVertexUniformComponents = */ 4096, + /* .MaxVaryingFloats = */ 64, + /* .MaxVertexTextureImageUnits = */ 32, + /* .MaxCombinedTextureImageUnits = */ 80, + /* .MaxTextureImageUnits = */ 32, + /* .MaxFragmentUniformComponents = */ 4096, + /* .MaxDrawBuffers = */ 32, + /* .MaxVertexUniformVectors = */ 128, + /* .MaxVaryingVectors = */ 8, + /* .MaxFragmentUniformVectors = */ 16, + /* .MaxVertexOutputVectors = */ 16, + /* .MaxFragmentInputVectors = */ 15, + /* .MinProgramTexelOffset = */ -8, + /* .MaxProgramTexelOffset = */ 7, + /* .MaxClipDistances = */ 8, + /* .MaxComputeWorkGroupCountX = */ 65535, + /* .MaxComputeWorkGroupCountY = */ 65535, + /* .MaxComputeWorkGroupCountZ = */ 65535, + /* .MaxComputeWorkGroupSizeX = */ 1024, + /* .MaxComputeWorkGroupSizeY = */ 1024, + /* .MaxComputeWorkGroupSizeZ = */ 64, + /* .MaxComputeUniformComponents = */ 1024, + /* .MaxComputeTextureImageUnits = */ 16, + /* .MaxComputeImageUniforms = */ 8, + /* .MaxComputeAtomicCounters = */ 8, + /* .MaxComputeAtomicCounterBuffers = */ 1, + /* .MaxVaryingComponents = */ 60, + /* .MaxVertexOutputComponents = */ 64, + /* .MaxGeometryInputComponents = */ 64, + /* .MaxGeometryOutputComponents = */ 128, + /* .MaxFragmentInputComponents = */ 128, + /* .MaxImageUnits = */ 8, + /* .MaxCombinedImageUnitsAndFragmentOutputs = */ 8, + /* .MaxCombinedShaderOutputResources = */ 8, + /* .MaxImageSamples = */ 0, + /* .MaxVertexImageUniforms = */ 0, + /* .MaxTessControlImageUniforms = */ 0, + /* .MaxTessEvaluationImageUniforms = */ 0, + /* .MaxGeometryImageUniforms = */ 0, + /* .MaxFragmentImageUniforms = */ 8, + /* .MaxCombinedImageUniforms = */ 8, + /* .MaxGeometryTextureImageUnits = */ 16, + /* .MaxGeometryOutputVertices = */ 256, + /* .MaxGeometryTotalOutputComponents = */ 1024, + /* .MaxGeometryUniformComponents = */ 1024, + /* .MaxGeometryVaryingComponents = */ 64, + /* .MaxTessControlInputComponents = */ 128, + /* .MaxTessControlOutputComponents = */ 128, + /* .MaxTessControlTextureImageUnits = */ 16, + /* .MaxTessControlUniformComponents = */ 1024, + /* .MaxTessControlTotalOutputComponents = */ 4096, + /* .MaxTessEvaluationInputComponents = */ 128, + /* .MaxTessEvaluationOutputComponents = */ 128, + /* .MaxTessEvaluationTextureImageUnits = */ 16, + /* .MaxTessEvaluationUniformComponents = */ 1024, + /* .MaxTessPatchComponents = */ 120, + /* .MaxPatchVertices = */ 32, + /* .MaxTessGenLevel = */ 64, + /* .MaxViewports = */ 16, + /* .MaxVertexAtomicCounters = */ 0, + /* .MaxTessControlAtomicCounters = */ 0, + /* .MaxTessEvaluationAtomicCounters = */ 0, + /* .MaxGeometryAtomicCounters = */ 0, + /* .MaxFragmentAtomicCounters = */ 8, + /* .MaxCombinedAtomicCounters = */ 8, + /* .MaxAtomicCounterBindings = */ 1, + /* .MaxVertexAtomicCounterBuffers = */ 0, + /* .MaxTessControlAtomicCounterBuffers = */ 0, + /* .MaxTessEvaluationAtomicCounterBuffers = */ 0, + /* .MaxGeometryAtomicCounterBuffers = */ 0, + /* .MaxFragmentAtomicCounterBuffers = */ 1, + /* .MaxCombinedAtomicCounterBuffers = */ 1, + /* .MaxAtomicCounterBufferSize = */ 16384, + /* .MaxTransformFeedbackBuffers = */ 4, + /* .MaxTransformFeedbackInterleavedComponents = */ 64, + /* .MaxCullDistances = */ 8, + /* .MaxCombinedClipAndCullDistances = */ 8, + /* .MaxSamples = */ 4, +#if GLSLANG_PATCH_LEVEL >= 2892 + /* .maxMeshOutputVerticesNV = */ 256, + /* .maxMeshOutputPrimitivesNV = */ 512, + /* .maxMeshWorkGroupSizeX_NV = */ 32, + /* .maxMeshWorkGroupSizeY_NV = */ 1, + /* .maxMeshWorkGroupSizeZ_NV = */ 1, + /* .maxTaskWorkGroupSizeX_NV = */ 32, + /* .maxTaskWorkGroupSizeY_NV = */ 1, + /* .maxTaskWorkGroupSizeZ_NV = */ 1, + /* .maxMeshViewCountNV = */ 4, +#endif + + .limits = { + /* .nonInductiveForLoops = */ 1, + /* .whileLoops = */ 1, + /* .doWhileLoops = */ 1, + /* .generalUniformIndexing = */ 1, + /* .generalAttributeMatrixVectorIndexing = */ 1, + /* .generalVaryingIndexing = */ 1, + /* .generalSamplerIndexing = */ 1, + /* .generalVariableIndexing = */ 1, + /* .generalConstantMatrixVectorIndexing = */ 1, + } +}; diff --git a/libavfilter/glslang.h b/libavfilter/glslang.h new file mode 100644 index 0000000000000..865af71580d0a --- /dev/null +++ b/libavfilter/glslang.h @@ -0,0 +1,49 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int glslang_init(void); +void glslang_uninit(void); + +typedef struct GLSlangResult { + int success; + const char *error_msg; + + void *data; /* Shader data or NULL */ + size_t size; +} GLSlangResult; + +enum GLSlangStage { + GLSLANG_VERTEX, + GLSLANG_FRAGMENT, + GLSLANG_COMPUTE, +}; + +/* Compile GLSL into a SPIRV stream, if possible */ +GLSlangResult *glslang_compile(const char *glsl, enum GLSlangStage stage); + +#ifdef __cplusplus +} +#endif diff --git a/libavfilter/vulkan.c b/libavfilter/vulkan.c new file mode 100644 index 0000000000000..99aaeb2ef42d1 --- /dev/null +++ b/libavfilter/vulkan.c @@ -0,0 +1,1221 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "formats.h" +#include "vulkan.h" +#include "glslang.h" + +/* Generic macro for creating contexts which need to keep their addresses + * if another context is created. */ +#define FN_CREATING(ctx, type, shortname, array, num) \ +static av_always_inline type *create_ ##shortname(ctx *dctx) \ +{ \ + type **array, *sctx = av_mallocz(sizeof(*sctx)); \ + if (!sctx) \ + return NULL; \ + \ + array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\ + if (!array) { \ + av_free(sctx); \ + return NULL; \ + } \ + \ + dctx->array = array; \ + dctx->array[dctx->num++] = sctx; \ + \ + return sctx; \ +} + +const VkComponentMapping ff_comp_identity_map = { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, +}; + +/* Converts return values to strings */ +const char *ff_vk_ret2str(VkResult res) +{ +#define CASE(VAL) case VAL: return #VAL + switch (res) { + CASE(VK_SUCCESS); + CASE(VK_NOT_READY); + CASE(VK_TIMEOUT); + CASE(VK_EVENT_SET); + CASE(VK_EVENT_RESET); + CASE(VK_INCOMPLETE); + CASE(VK_ERROR_OUT_OF_HOST_MEMORY); + CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY); + CASE(VK_ERROR_INITIALIZATION_FAILED); + CASE(VK_ERROR_DEVICE_LOST); + CASE(VK_ERROR_MEMORY_MAP_FAILED); + CASE(VK_ERROR_LAYER_NOT_PRESENT); + CASE(VK_ERROR_EXTENSION_NOT_PRESENT); + CASE(VK_ERROR_FEATURE_NOT_PRESENT); + CASE(VK_ERROR_INCOMPATIBLE_DRIVER); + CASE(VK_ERROR_TOO_MANY_OBJECTS); + CASE(VK_ERROR_FORMAT_NOT_SUPPORTED); + CASE(VK_ERROR_FRAGMENTED_POOL); + CASE(VK_ERROR_SURFACE_LOST_KHR); + CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); + CASE(VK_SUBOPTIMAL_KHR); + CASE(VK_ERROR_OUT_OF_DATE_KHR); + CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); + CASE(VK_ERROR_VALIDATION_FAILED_EXT); + CASE(VK_ERROR_INVALID_SHADER_NV); + CASE(VK_ERROR_OUT_OF_POOL_MEMORY); + CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); + CASE(VK_ERROR_NOT_PERMITTED_EXT); + default: return "Unknown error"; + } +#undef CASE +} + +static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) +{ + VkResult ret; + int index = -1; + VkPhysicalDeviceProperties props; + VkPhysicalDeviceMemoryProperties mprops; + VulkanFilterContext *s = avctx->priv; + + VkMemoryAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = alloc_extension, + }; + + vkGetPhysicalDeviceProperties(s->hwctx->phys_dev, &props); + vkGetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &mprops); + + /* Align if we need to */ + if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + req->size = FFALIGN(req->size, props.limits.minMemoryMapAlignment); + + alloc_info.allocationSize = req->size; + + /* The vulkan spec requires memory types to be sorted in the "optimal" + * order, so the first matching type we find will be the best/fastest one */ + for (int i = 0; i < mprops.memoryTypeCount; i++) { + /* The memory type must be supported by the requirements (bitfield) */ + if (!(req->memoryTypeBits & (1 << i))) + continue; + + /* The memory type flags must include our properties */ + if ((mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags) + continue; + + /* Found a suitable memory type */ + index = i; + break; + } + + if (index < 0) { + av_log(avctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", + req_flags); + return AVERROR(EINVAL); + } + + alloc_info.memoryTypeIndex = index; + + ret = vkAllocateMemory(s->hwctx->act_dev, &alloc_info, + s->hwctx->alloc, mem); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR(ENOMEM); + } + + *mem_flags |= mprops.memoryTypes[index].propertyFlags; + + return 0; +} + +int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) +{ + int err; + VkResult ret; + VkMemoryRequirements req; + VulkanFilterContext *s = avctx->priv; + + VkBufferCreateInfo buf_spawn = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = NULL, + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .size = size, /* Gets FFALIGNED during alloc if host visible + but should be ok */ + }; + + ret = vkCreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to create buffer: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + vkGetBufferMemoryRequirements(s->hwctx->act_dev, buf->buf, &req); + + err = vk_alloc_mem(avctx, &req, flags, NULL, &buf->flags, &buf->mem); + if (err) + return err; + + ret = vkBindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[], + int nb_buffers, int invalidate) +{ + VkResult ret; + VulkanFilterContext *s = avctx->priv; + VkMappedMemoryRange *inval_list = NULL; + int inval_count = 0; + + for (int i = 0; i < nb_buffers; i++) { + ret = vkMapMemory(s->hwctx->act_dev, buf[i].mem, 0, + VK_WHOLE_SIZE, 0, (void **)&mem[i]); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + if (!invalidate) + return 0; + + for (int i = 0; i < nb_buffers; i++) { + const VkMappedMemoryRange ival_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = buf[i].mem, + .size = VK_WHOLE_SIZE, + }; + if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + continue; + inval_list = av_fast_realloc(s->scratch, &s->scratch_size, + (++inval_count)*sizeof(*inval_list)); + if (!inval_list) + return AVERROR(ENOMEM); + inval_list[inval_count - 1] = ival_buf; + } + + if (inval_count) { + ret = vkInvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count, + inval_list); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to invalidate memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + return 0; +} + +int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers, + int flush) +{ + int err = 0; + VkResult ret; + VulkanFilterContext *s = avctx->priv; + VkMappedMemoryRange *flush_list = NULL; + int flush_count = 0; + + if (flush) { + for (int i = 0; i < nb_buffers; i++) { + const VkMappedMemoryRange flush_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = buf[i].mem, + .size = VK_WHOLE_SIZE, + }; + if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + continue; + flush_list = av_fast_realloc(s->scratch, &s->scratch_size, + (++flush_count)*sizeof(*flush_list)); + if (!flush_list) + return AVERROR(ENOMEM); + flush_list[flush_count - 1] = flush_buf; + } + } + + if (flush_count) { + ret = vkFlushMappedMemoryRanges(s->hwctx->act_dev, flush_count, + flush_list); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; /* We still want to try to unmap them */ + } + } + + for (int i = 0; i < nb_buffers; i++) + vkUnmapMemory(s->hwctx->act_dev, buf[i].mem); + + return err; +} + +void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf) +{ + VulkanFilterContext *s = avctx->priv; + if (!buf) + return; + + if (buf->buf != VK_NULL_HANDLE) + vkDestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc); + if (buf->mem != VK_NULL_HANDLE) + vkFreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); +} + +int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl, + int offset, int size, VkShaderStageFlagBits stage) +{ + VkPushConstantRange *pc; + + pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts), + pl->push_consts_num + 1); + if (!pl->push_consts) + return AVERROR(ENOMEM); + + pc = &pl->push_consts[pl->push_consts_num++]; + memset(pc, 0, sizeof(*pc)); + + pc->stageFlags = stage; + pc->offset = offset; + pc->size = size; + + return 0; +} + +FN_CREATING(VulkanFilterContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num) +int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx, int queue) +{ + VkResult ret; + FFVkExecContext *e; + VulkanFilterContext *s = avctx->priv; + + VkCommandPoolCreateInfo cqueue_create = { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = queue, + }; + VkCommandBufferAllocateInfo cbuf_create = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = 1, + }; + VkFenceCreateInfo fence_spawn = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO }; + + e = create_exec_ctx(s); + if (!e) + return AVERROR(ENOMEM); + + ret = vkCreateCommandPool(s->hwctx->act_dev, &cqueue_create, + s->hwctx->alloc, &e->pool); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Command pool creation failure: %s\n", + ff_vk_ret2str(ret)); + return 1; + } + + cbuf_create.commandPool = e->pool; + + ret = vkAllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, &e->buf); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", + ff_vk_ret2str(ret)); + return 1; + } + + ret = vkCreateFence(s->hwctx->act_dev, &fence_spawn, + s->hwctx->alloc, &e->fence); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n", + ff_vk_ret2str(ret)); + return 1; + } + + vkGetDeviceQueue(s->hwctx->act_dev, queue, 0, &e->queue); + + *ctx = e; + + return 0; +} + +int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e) +{ + VkResult ret; + VkCommandBufferBeginInfo cmd_start = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + + e->sem_wait_cnt = 0; + e->sem_sig_cnt = 0; + + ret = vkBeginCommandBuffer(e->buf, &cmd_start); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to start command recoding: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e, + AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag) +{ + AVVkFrame *f = (AVVkFrame *)frame->data[0]; + AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data; + int planes = av_pix_fmt_count_planes(fc->sw_format); + + for (int i = 0; i < planes; i++) { + e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc, + (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait)); + if (!e->sem_wait) + return AVERROR(ENOMEM); + + e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc, + (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst)); + if (!e->sem_wait_dst) + return AVERROR(ENOMEM); + + e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc, + (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig)); + if (!e->sem_sig) + return AVERROR(ENOMEM); + + e->sem_wait[e->sem_wait_cnt] = f->sem[i]; + e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; + e->sem_wait_cnt++; + + e->sem_sig[e->sem_sig_cnt] = f->sem[i]; + e->sem_sig_cnt++; + } + + return 0; +} + +int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e) +{ + VkResult ret; + VulkanFilterContext *s = avctx->priv; + + VkSubmitInfo s_info = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &e->buf, + + .pWaitSemaphores = e->sem_wait, + .pWaitDstStageMask = e->sem_wait_dst, + .waitSemaphoreCount = e->sem_wait_cnt, + + .pSignalSemaphores = e->sem_sig, + .signalSemaphoreCount = e->sem_sig_cnt, + }; + + vkEndCommandBuffer(e->buf); + + ret = vkQueueSubmit(e->queue, 1, &s_info, e->fence); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + vkWaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); + vkResetFences(s->hwctx->act_dev, 1, &e->fence); + + return 0; +} + +int ff_vk_filter_query_formats(AVFilterContext *avctx) +{ + static const enum AVPixelFormat pixel_formats[] = { + AV_PIX_FMT_VULKAN, AV_PIX_FMT_NONE, + }; + AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats); + if (!pix_fmts) + return AVERROR(ENOMEM); + + return ff_set_common_formats(avctx, pix_fmts); +} + +static int vulkan_filter_set_device(AVFilterContext *avctx, + AVBufferRef *device) +{ + VulkanFilterContext *s = avctx->priv; + + av_buffer_unref(&s->device_ref); + + s->device_ref = av_buffer_ref(device); + if (!s->device_ref) + return AVERROR(ENOMEM); + + s->device = (AVHWDeviceContext*)s->device_ref->data; + s->hwctx = s->device->hwctx; + + return 0; +} + +static int vulkan_filter_set_frames(AVFilterContext *avctx, + AVBufferRef *frames) +{ + VulkanFilterContext *s = avctx->priv; + + av_buffer_unref(&s->frames_ref); + + s->frames_ref = av_buffer_ref(frames); + if (!s->frames_ref) + return AVERROR(ENOMEM); + + return 0; +} + +int ff_vk_filter_config_input(AVFilterLink *inlink) +{ + int err; + AVFilterContext *avctx = inlink->dst; + VulkanFilterContext *s = avctx->priv; + AVHWFramesContext *input_frames; + + if (!inlink->hw_frames_ctx) { + av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " + "hardware frames context on the input.\n"); + return AVERROR(EINVAL); + } + + /* Extract the device and default output format from the first input. */ + if (avctx->inputs[0] != inlink) + return 0; + + input_frames = (AVHWFramesContext*)inlink->hw_frames_ctx->data; + if (input_frames->format != AV_PIX_FMT_VULKAN) + return AVERROR(EINVAL); + + err = vulkan_filter_set_device(avctx, input_frames->device_ref); + if (err < 0) + return err; + err = vulkan_filter_set_frames(avctx, inlink->hw_frames_ctx); + if (err < 0) + return err; + + /* Default output parameters match input parameters. */ + s->input_format = input_frames->sw_format; + if (s->output_format == AV_PIX_FMT_NONE) + s->output_format = input_frames->sw_format; + if (!s->output_width) + s->output_width = inlink->w; + if (!s->output_height) + s->output_height = inlink->h; + + return 0; +} + +int ff_vk_filter_config_output_inplace(AVFilterLink *outlink) +{ + int err; + AVFilterContext *avctx = outlink->src; + VulkanFilterContext *s = avctx->priv; + + av_buffer_unref(&outlink->hw_frames_ctx); + + if (!s->device_ref) { + if (!avctx->hw_device_ctx) { + av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " + "Vulkan device.\n"); + return AVERROR(EINVAL); + } + + err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx); + if (err < 0) + return err; + } + + outlink->hw_frames_ctx = av_buffer_ref(s->frames_ref); + outlink->w = s->output_width; + outlink->h = s->output_height; + + return 0; +} + +int ff_vk_filter_config_output(AVFilterLink *outlink) +{ + int err; + AVFilterContext *avctx = outlink->src; + VulkanFilterContext *s = avctx->priv; + AVBufferRef *output_frames_ref; + AVHWFramesContext *output_frames; + + av_buffer_unref(&outlink->hw_frames_ctx); + + if (!s->device_ref) { + if (!avctx->hw_device_ctx) { + av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " + "Vulkan device.\n"); + return AVERROR(EINVAL); + } + + err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx); + if (err < 0) + return err; + } + + output_frames_ref = av_hwframe_ctx_alloc(s->device_ref); + if (!output_frames_ref) { + err = AVERROR(ENOMEM); + goto fail; + } + output_frames = (AVHWFramesContext*)output_frames_ref->data; + + output_frames->format = AV_PIX_FMT_VULKAN; + output_frames->sw_format = s->output_format; + output_frames->width = s->output_width; + output_frames->height = s->output_height; + + err = av_hwframe_ctx_init(output_frames_ref); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to initialise output " + "frames: %d.\n", err); + goto fail; + } + + outlink->hw_frames_ctx = output_frames_ref; + outlink->w = s->output_width; + outlink->h = s->output_height; + + return 0; +fail: + av_buffer_unref(&output_frames_ref); + return err; +} + +int ff_vk_filter_init(AVFilterContext *avctx) +{ + VulkanFilterContext *s = avctx->priv; + + s->output_format = AV_PIX_FMT_NONE; + + if (!glslang_init()) + return AVERROR_EXTERNAL; + + return 0; +} + +FN_CREATING(VulkanFilterContext, VkSampler, sampler, samplers, samplers_num) +VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords, + VkFilter filt) +{ + VkResult ret; + VulkanFilterContext *s = avctx->priv; + + VkSamplerCreateInfo sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = filt, + .minFilter = sampler_info.magFilter, + .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST : + VK_SAMPLER_MIPMAP_MODE_LINEAR, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeV = sampler_info.addressModeU, + .addressModeW = sampler_info.addressModeU, + .anisotropyEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_NEVER, + .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, + .unnormalizedCoordinates = unnorm_coords, + }; + + VkSampler *sampler = create_sampler(s); + if (!sampler) + return NULL; + + ret = vkCreateSampler(s->hwctx->act_dev, &sampler_info, + s->hwctx->alloc, sampler); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init sampler: %s\n", + ff_vk_ret2str(ret)); + return NULL; + } + + return sampler; +} + +const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt); + const int high = desc->comp[0].depth > 8; + return high ? "rgba16f" : "rgba8"; +} + +int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, VkImage img, + VkFormat fmt, const VkComponentMapping map) +{ + VulkanFilterContext *s = avctx->priv; + VkImageViewCreateInfo imgview_spawn = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = NULL, + .image = img, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = fmt, + .components = map, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + + VkResult ret = vkCreateImageView(s->hwctx->act_dev, &imgview_spawn, + s->hwctx->alloc, v); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +void ff_vk_destroy_imageview(AVFilterContext *avctx, VkImageView *v) +{ + VulkanFilterContext *s = avctx->priv; + if (v && *v) { + vkDestroyImageView(s->hwctx->act_dev, *v, s->hwctx->alloc); + *v = NULL; + } +} + +FN_CREATING(VulkanPipeline, SPIRVShader, shader, shaders, shaders_num) +SPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl, + const char *name, VkShaderStageFlags stage) +{ + SPIRVShader *shd = create_shader(pl); + if (!shd) + return NULL; + + av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED); + + shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shd->shader.stage = stage; + + shd->name = name; + + GLSLF(0, #version %i ,460); + GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) ); + GLSLC(0, ); + + return shd; +} + +void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, SPIRVShader *shd, + int local_size[3]) +{ + shd->local_size[0] = local_size[0]; + shd->local_size[1] = local_size[1]; + shd->local_size[2] = local_size[2]; + + av_bprintf(&shd->src, "layout (local_size_x = %i, " + "local_size_y = %i, local_size_z = %i) in;\n\n", + shd->local_size[0], shd->local_size[1], shd->local_size[2]); +} + +static void print_shader(AVFilterContext *avctx, SPIRVShader *shd) +{ + int line = 0; + const char *p = shd->src.str; + const char *start = p; + + AVBPrint buf; + av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED); + + for (int i = 0; i < strlen(p); i++) { + if (p[i] == '\n') { + av_bprintf(&buf, "%i\t", ++line); + av_bprint_append_data(&buf, start, &p[i] - start + 1); + start = &p[i + 1]; + } + } + + av_log(avctx, AV_LOG_VERBOSE, "Compiling shader %s: \n%s", + shd->name, buf.str); + av_bprint_finalize(&buf, NULL); +} + +int ff_vk_compile_shader(AVFilterContext *avctx, SPIRVShader *shd, + const char *entrypoint) +{ + VkResult ret; + VulkanFilterContext *s = avctx->priv; + VkShaderModuleCreateInfo shader_create; + GLSlangResult *res; + + static const enum GLSlangStage emap[] = { + [VK_SHADER_STAGE_VERTEX_BIT] = GLSLANG_VERTEX, + [VK_SHADER_STAGE_FRAGMENT_BIT] = GLSLANG_FRAGMENT, + [VK_SHADER_STAGE_COMPUTE_BIT] = GLSLANG_COMPUTE, + }; + + shd->shader.pName = entrypoint; + + print_shader(avctx, shd); + + res = glslang_compile(shd->src.str, emap[shd->shader.stage]); + + av_bprint_finalize(&shd->src, NULL); + + if (!res->success) { + av_log(avctx, AV_LOG_ERROR, "%s", res->error_msg); + return AVERROR_EXTERNAL; + } + + shader_create.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + shader_create.pNext = NULL; + shader_create.codeSize = res->size; + shader_create.flags = 0; + shader_create.pCode = res->data; + + ret = vkCreateShaderModule(s->hwctx->act_dev, &shader_create, NULL, + &shd->shader.module); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create shader module: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + av_log(avctx, AV_LOG_VERBOSE, "Shader linked! Size: %zu bytes\n", + shader_create.codeSize); + + return 0; +} + +static const struct descriptor_props { + size_t struct_size; /* Size of the opaque which updates the descriptor */ + const char *type; + int is_uniform; + int mem_quali; /* Can use a memory qualifier */ + int dim_needed; /* Must indicate dimension */ + int buf_content; /* Must indicate buffer contents */ +} descriptor_props[] = { + [VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, }, + [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, }, + [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, }, + [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, }, + [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, }, + [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, }, + [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, }, + [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, }, + [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, }, + [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, }, + [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, }, +}; + +int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, + SPIRVShader *shd, VulkanDescriptorSetBinding *desc, + int num, int only_print_to_shader) +{ + VkResult ret; + VkDescriptorSetLayout *layout; + VulkanFilterContext *s = avctx->priv; + + if (only_print_to_shader) + goto print; + + pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout), + pl->descriptor_sets_num + 1); + if (!pl->desc_layout) + return AVERROR(ENOMEM); + + layout = &pl->desc_layout[pl->descriptor_sets_num]; + memset(layout, 0, sizeof(*layout)); + + { /* Create descriptor set layout descriptions */ + VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 }; + VkDescriptorSetLayoutBinding *desc_binding; + + desc_binding = av_mallocz(sizeof(*desc_binding)*num); + if (!desc_binding) + return AVERROR(ENOMEM); + + for (int i = 0; i < num; i++) { + desc_binding[i].binding = i; + desc_binding[i].descriptorType = desc[i].type; + desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1); + desc_binding[i].stageFlags = desc[i].stages; + desc_binding[i].pImmutableSamplers = desc[i].samplers; + } + + desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + desc_create_layout.pBindings = desc_binding; + desc_create_layout.bindingCount = num; + + ret = vkCreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, + s->hwctx->alloc, layout); + av_free(desc_binding); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set " + "layout: %s\n", ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + { /* Pool each descriptor by type and update pool counts */ + for (int i = 0; i < num; i++) { + int j; + for (j = 0; j < pl->pool_size_desc_num; j++) + if (pl->pool_size_desc[j].type == desc[i].type) + break; + if (j >= pl->pool_size_desc_num) { + pl->pool_size_desc = av_realloc_array(pl->pool_size_desc, + sizeof(*pl->pool_size_desc), + ++pl->pool_size_desc_num); + if (!pl->pool_size_desc) + return AVERROR(ENOMEM); + memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize)); + } + pl->pool_size_desc[j].type = desc[i].type; + pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1); + } + } + + { /* Create template creation struct */ + VkDescriptorUpdateTemplateCreateInfo *dt; + VkDescriptorUpdateTemplateEntry *des_entries; + + /* Freed after descriptor set initialization */ + des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry)); + if (!des_entries) + return AVERROR(ENOMEM); + + for (int i = 0; i < num; i++) { + des_entries[i].dstBinding = i; + des_entries[i].descriptorType = desc[i].type; + des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1); + des_entries[i].dstArrayElement = 0; + des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s; + des_entries[i].stride = descriptor_props[desc[i].type].struct_size; + } + + pl->desc_template_info = av_realloc_array(pl->desc_template_info, + sizeof(*pl->desc_template_info), + pl->descriptor_sets_num + 1); + if (!pl->desc_template_info) + return AVERROR(ENOMEM); + + dt = &pl->desc_template_info[pl->descriptor_sets_num]; + memset(dt, 0, sizeof(*dt)); + + dt->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO; + dt->templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET; + dt->descriptorSetLayout = *layout; + dt->pDescriptorUpdateEntries = des_entries; + dt->descriptorUpdateEntryCount = num; + } + + pl->descriptor_sets_num++; + +print: + /* Write shader info */ + for (int i = 0; i < num; i++) { + const struct descriptor_props *prop = &descriptor_props[desc[i].type]; + GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i); + + if (desc[i].mem_layout) + GLSLA(", %s", desc[i].mem_layout); + GLSLA(")"); + + if (prop->is_uniform) + GLSLA(" uniform"); + + if (prop->mem_quali && desc[i].mem_quali) + GLSLA(" %s", desc[i].mem_quali); + + if (prop->type) + GLSLA(" %s", prop->type); + + if (prop->dim_needed) + GLSLA("%iD", desc[i].dimensions); + + GLSLA(" %s", desc[i].name); + + if (prop->buf_content) + GLSLA(" {\n %s\n}", desc[i].buf_content); + else if (desc[i].elems > 0) + GLSLA("[%i]", desc[i].elems); + + GLSLA(";\n"); + } + GLSLA("\n"); + + return 0; +} + +void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, + int set_id) +{ + VulkanFilterContext *s = avctx->priv; + + vkUpdateDescriptorSetWithTemplate(s->hwctx->act_dev, + pl->desc_set[set_id], + pl->desc_template[set_id], s); +} + +void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e, + VkShaderStageFlagBits stage, int offset, + size_t size, void *src) +{ + vkCmdPushConstants(e->buf, e->bound_pl->pipeline_layout, + stage, offset, size, src); +} + +int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl) +{ + VkResult ret; + VulkanFilterContext *s = avctx->priv; + + { /* Init descriptor set pool */ + VkDescriptorPoolCreateInfo pool_create_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .poolSizeCount = pl->pool_size_desc_num, + .pPoolSizes = pl->pool_size_desc, + .maxSets = pl->descriptor_sets_num, + }; + + ret = vkCreateDescriptorPool(s->hwctx->act_dev, &pool_create_info, + s->hwctx->alloc, &pl->desc_pool); + av_freep(&pl->pool_size_desc); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set " + "pool: %s\n", ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + { /* Allocate descriptor sets */ + VkDescriptorSetAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = pl->desc_pool, + .descriptorSetCount = pl->descriptor_sets_num, + .pSetLayouts = pl->desc_layout, + }; + + pl->desc_set = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_set)); + if (!pl->desc_set) + return AVERROR(ENOMEM); + + ret = vkAllocateDescriptorSets(s->hwctx->act_dev, &alloc_info, + pl->desc_set); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + { /* Finally create the pipeline layout */ + VkPipelineLayoutCreateInfo spawn_pipeline_layout = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = pl->descriptor_sets_num, + .pSetLayouts = pl->desc_layout, + .pushConstantRangeCount = pl->push_consts_num, + .pPushConstantRanges = pl->push_consts, + }; + + ret = vkCreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout, + s->hwctx->alloc, &pl->pipeline_layout); + av_freep(&pl->push_consts); + pl->push_consts_num = 0; + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + { /* Descriptor template (for tightly packed descriptors) */ + VkDescriptorUpdateTemplateCreateInfo *desc_template_info; + + pl->desc_template = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_template)); + if (!pl->desc_template) + return AVERROR(ENOMEM); + + /* Create update templates for the descriptor sets */ + for (int i = 0; i < pl->descriptor_sets_num; i++) { + desc_template_info = &pl->desc_template_info[i]; + desc_template_info->pipelineLayout = pl->pipeline_layout; + ret = vkCreateDescriptorUpdateTemplate(s->hwctx->act_dev, + desc_template_info, + s->hwctx->alloc, + &pl->desc_template[i]); + av_free((void *)desc_template_info->pDescriptorUpdateEntries); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor " + "template: %s\n", ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + av_freep(&pl->desc_template_info); + } + + return 0; +} + +FN_CREATING(VulkanFilterContext, VulkanPipeline, pipeline, pipelines, pipelines_num) +VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx) +{ + return create_pipeline(avctx->priv); +} + +int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl) +{ + int i; + VkResult ret; + VulkanFilterContext *s = avctx->priv; + + VkComputePipelineCreateInfo pipe = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .layout = pl->pipeline_layout, + }; + + for (i = 0; i < pl->shaders_num; i++) { + if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) { + pipe.stage = pl->shaders[i]->shader; + break; + } + } + if (i == pl->shaders_num) { + av_log(avctx, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n"); + return AVERROR(EINVAL); + } + + ret = vkCreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe, + s->hwctx->alloc, &pl->pipeline); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; + + return 0; +} + +void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e, + VulkanPipeline *pl) +{ + vkCmdBindPipeline(e->buf, pl->bind_point, pl->pipeline); + + vkCmdBindDescriptorSets(e->buf, pl->bind_point, pl->pipeline_layout, 0, + pl->descriptor_sets_num, pl->desc_set, 0, 0); + + e->bound_pl = pl; +} + +static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e) +{ + vkDestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc); + + if (e->buf != VK_NULL_HANDLE) + vkFreeCommandBuffers(s->hwctx->act_dev, e->pool, 1, &e->buf); + if (e->pool != VK_NULL_HANDLE) + vkDestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc); + + av_free(e->sem_wait); + av_free(e->sem_wait_dst); + av_free(e->sem_sig); + + av_free(e); +} + +static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl) +{ + for (int i = 0; i < pl->shaders_num; i++) { + SPIRVShader *shd = pl->shaders[i]; + vkDestroyShaderModule(s->hwctx->act_dev, shd->shader.module, + s->hwctx->alloc); + av_free(shd); + } + + vkDestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc); + vkDestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout, + s->hwctx->alloc); + + for (int i = 0; i < pl->descriptor_sets_num; i++) { + vkDestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i], + s->hwctx->alloc); + vkDestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i], + s->hwctx->alloc); + } + + /* Also frees the descriptor sets */ + vkDestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool, + s->hwctx->alloc); + + av_freep(&pl->desc_set); + av_freep(&pl->shaders); + av_freep(&pl->desc_layout); + av_freep(&pl->desc_template); + av_freep(&pl->push_consts); + pl->push_consts_num = 0; + + /* Only freed in case of failure */ + av_freep(&pl->pool_size_desc); + if (pl->desc_template_info) { + for (int i = 0; i < pl->descriptor_sets_num; i++) + av_free((void *)pl->desc_template_info[i].pDescriptorUpdateEntries); + av_freep(&pl->desc_template_info); + } + + av_free(pl); +} + +void ff_vk_filter_uninit(AVFilterContext *avctx) +{ + VulkanFilterContext *s = avctx->priv; + + glslang_uninit(); + + for (int i = 0; i < s->samplers_num; i++) + vkDestroySampler(s->hwctx->act_dev, *s->samplers[i], s->hwctx->alloc); + av_freep(&s->samplers); + + for (int i = 0; i < s->pipelines_num; i++) + free_pipeline(s, s->pipelines[i]); + av_freep(&s->pipelines); + + for (int i = 0; i < s->exec_ctx_num; i++) + free_exec_ctx(s, s->exec_ctx[i]); + av_freep(&s->exec_ctx); + + av_freep(&s->scratch); + s->scratch_size = 0; + + av_buffer_unref(&s->device_ref); + av_buffer_unref(&s->frames_ref); +} diff --git a/libavfilter/vulkan.h b/libavfilter/vulkan.h new file mode 100644 index 0000000000000..8d4def1a004ab --- /dev/null +++ b/libavfilter/vulkan.h @@ -0,0 +1,323 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_VULKAN_COMMON_H +#define AVFILTER_VULKAN_COMMON_H + +#include "avfilter.h" +#include "libavutil/pixdesc.h" +#include "libavutil/bprint.h" +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_vulkan.h" + +/* GLSL management macros */ +#define INDENT(N) INDENT_##N +#define INDENT_0 +#define INDENT_1 INDENT_0 " " +#define INDENT_2 INDENT_1 INDENT_1 +#define INDENT_3 INDENT_2 INDENT_1 +#define INDENT_4 INDENT_3 INDENT_1 +#define INDENT_5 INDENT_4 INDENT_1 +#define INDENT_6 INDENT_5 INDENT_1 +#define C(N, S) INDENT(N) #S "\n" +#define GLSLC(N, S) av_bprintf(&shd->src, C(N, S)) +#define GLSLA(...) av_bprintf(&shd->src, __VA_ARGS__) +#define GLSLF(N, S, ...) av_bprintf(&shd->src, C(N, S), __VA_ARGS__) +#define GLSLD(D) GLSLC(0, ); \ + av_bprint_append_data(&shd->src, D, strlen(D)); \ + GLSLC(0, ) + +/* Helper, pretty much every Vulkan return value needs to be checked */ +#define RET(x) \ + do { \ + if ((err = (x)) < 0) \ + goto fail; \ + } while (0) + +/* Useful for attaching immutable samplers to arrays */ +#define DUP_SAMPLER_ARRAY4(x) (VkSampler []){ x, x, x, x, } + +typedef struct SPIRVShader { + const char *name; /* Name for id/debugging purposes */ + AVBPrint src; + int local_size[3]; /* Compute shader workgroup sizes */ + VkPipelineShaderStageCreateInfo shader; +} SPIRVShader; + +typedef struct VulkanDescriptorSetBinding { + const char *name; + VkDescriptorType type; + const char *mem_layout; /* Storage images (rgba8, etc.) and buffers (std430, etc.) */ + const char *mem_quali; /* readonly, writeonly, etc. */ + const char *buf_content; /* For buffers */ + uint32_t dimensions; /* Needed for e.g. sampler%iD */ + uint32_t elems; /* 0 - scalar, 1 or more - vector */ + VkShaderStageFlags stages; + const VkSampler *samplers; /* Immutable samplers, length - #elems */ + void *updater; /* Pointer to VkDescriptor*Info */ +} VulkanDescriptorSetBinding; + +typedef struct FFVkBuffer { + VkBuffer buf; + VkDeviceMemory mem; + VkMemoryPropertyFlagBits flags; +} FFVkBuffer; + +typedef struct VulkanPipeline { + VkPipelineBindPoint bind_point; + + /* Contexts */ + VkPipelineLayout pipeline_layout; + VkPipeline pipeline; + + /* Shaders */ + SPIRVShader **shaders; + int shaders_num; + + /* Push consts */ + VkPushConstantRange *push_consts; + int push_consts_num; + + /* Descriptors */ + VkDescriptorSetLayout *desc_layout; + VkDescriptorPool desc_pool; + VkDescriptorSet *desc_set; + VkDescriptorUpdateTemplate *desc_template; + int descriptor_sets_num; + int pool_size_desc_num; + + /* Temporary, used to store data in between initialization stages */ + VkDescriptorUpdateTemplateCreateInfo *desc_template_info; + VkDescriptorPoolSize *pool_size_desc; +} VulkanPipeline; + +typedef struct FFVkExecContext { + VkCommandPool pool; + VkCommandBuffer buf; + VkQueue queue; + VkFence fence; + + VulkanPipeline *bound_pl; + + VkSemaphore *sem_wait; + int sem_wait_alloc; /* Allocated sem_wait */ + int sem_wait_cnt; + + VkPipelineStageFlagBits *sem_wait_dst; + int sem_wait_dst_alloc; /* Allocated sem_wait_dst */ + + VkSemaphore *sem_sig; + int sem_sig_alloc; /* Allocated sem_sig */ + int sem_sig_cnt; +} FFVkExecContext; + +typedef struct VulkanFilterContext { + const AVClass *class; + + AVBufferRef *device_ref; + AVBufferRef *frames_ref; /* For in-place filtering */ + AVHWDeviceContext *device; + AVVulkanDeviceContext *hwctx; + + /* Properties */ + int output_width; + int output_height; + enum AVPixelFormat output_format; + enum AVPixelFormat input_format; + + /* Samplers */ + VkSampler **samplers; + int samplers_num; + + /* Exec contexts */ + FFVkExecContext **exec_ctx; + int exec_ctx_num; + + /* Pipelines (each can have 1 shader of each type) */ + VulkanPipeline **pipelines; + int pipelines_num; + + void *scratch; /* Scratch memory used only in functions */ + unsigned int scratch_size; +} VulkanFilterContext; + +/* Identity mapping - r = r, b = b, g = g, a = a */ +extern const VkComponentMapping ff_comp_identity_map; + +/** + * General lavfi IO functions + */ +int ff_vk_filter_query_formats (AVFilterContext *avctx); +int ff_vk_filter_init (AVFilterContext *avctx); +int ff_vk_filter_config_input (AVFilterLink *inlink); +int ff_vk_filter_config_output (AVFilterLink *outlink); +int ff_vk_filter_config_output_inplace(AVFilterLink *outlink); +void ff_vk_filter_uninit (AVFilterContext *avctx); + +/** + * Converts Vulkan return values to strings + */ +const char *ff_vk_ret2str(VkResult res); + +/** + * Gets the glsl format string for a pixel format + */ +const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt); + +/** + * Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit() + */ +VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords, + VkFilter filt); + +/** + * Create an imageview. + */ +int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, VkImage img, + VkFormat fmt, const VkComponentMapping map); + +/** + * Destroy an imageview. Command buffer must have completed executing, which + * ff_vk_submit_exec_queue() will ensure + */ +void ff_vk_destroy_imageview(AVFilterContext *avctx, VkImageView *v); + +/** + * Define a push constant for a given stage into a pipeline. + * Must be called before the pipeline layout has been initialized. + */ +int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl, + int offset, int size, VkShaderStageFlagBits stage); + +/** + * Inits a pipeline. Everything in it will be auto-freed when calling + * ff_vk_filter_uninit(). + */ +VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx); + +/** + * Inits a shader for a specific pipeline. Will be auto-freed on uninit. + */ +SPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl, + const char *name, VkShaderStageFlags stage); + +/** + * Writes the workgroup size for a shader. + */ +void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, SPIRVShader *shd, + int local_size[3]); + +/** + * Adds a descriptor set to the shader and registers them in the pipeline. + */ +int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, + SPIRVShader *shd, VulkanDescriptorSetBinding *desc, + int num, int only_print_to_shader); + +/** + * Compiles the shader, entrypoint must be set to "main". + */ +int ff_vk_compile_shader(AVFilterContext *avctx, SPIRVShader *shd, + const char *entrypoint); + +/** + * Initializes the pipeline layout after all shaders and descriptor sets have + * been finished. + */ +int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl); + +/** + * Initializes a compute pipeline. Will pick the first shader with the + * COMPUTE flag set. + */ +int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl); + +/** + * Updates a descriptor set via the updaters defined. + * Can be called immediately after pipeline creation, but must be called + * at least once before queue submission. + */ +void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, + int set_id); + +/** + * Init an execution context for command recording and queue submission. + * WIll be auto-freed on uninit. + */ +int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx, int queue); + +/** + * Begin recording to the command buffer. Previous execution must have been + * completed, which ff_vk_submit_exec_queue() will ensure. + */ +int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e); + +/** + * Add a command to bind the completed pipeline and its descriptor sets. + * Must be called after ff_vk_start_exec_recording() and before submission. + */ +void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e, + VulkanPipeline *pl); + +/** + * Updates push constants. + * Must be called after binding a pipeline if any push constants were defined. + */ +void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e, + VkShaderStageFlagBits stage, int offset, + size_t size, void *src); + +/** + * Adds a frame as a queue dependency. This manages semaphore signalling. + * Must be called before submission. + */ +int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e, + AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag); + +/** + * Submits a command buffer to the queue for execution. + * Will block until execution has finished in order to simplify resource + * management. + */ +int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e); + +/** + * Create a VkBuffer with the specified parameters. + */ +int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); + +/** + * Maps the buffer to userspace. Set invalidate to 1 if reading the contents + * is necessary. + */ +int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[], + int nb_buffers, int invalidate); + +/** + * Unmaps the buffer from userspace. Set flush to 1 to write and sync. + */ +int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers, + int flush); + +/** + * Frees a buffer. + */ +void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf); + +#endif /* AVFILTER_VULKAN_COMMON_H */ From 30df3d4a38313c2d490f89b9a313671cd81aa9a6 Mon Sep 17 00:00:00 2001 From: Lynne Date: Sun, 27 Oct 2019 14:45:36 +0000 Subject: [PATCH 5/9] lavfi: add an scale_vulkan filter This commit adds a basic, non-converting Vulkan scaling filter. --- configure | 1 + libavfilter/Makefile | 1 + libavfilter/allfilters.c | 1 + libavfilter/vf_scale_vulkan.c | 352 ++++++++++++++++++++++++++++++++++ 4 files changed, 355 insertions(+) create mode 100644 libavfilter/vf_scale_vulkan.c diff --git a/configure b/configure index 43a615c20ab4f..6da9cdf4a4fc0 100755 --- a/configure +++ b/configure @@ -3598,6 +3598,7 @@ zmq_filter_deps="libzmq" zoompan_filter_deps="swscale" zscale_filter_deps="libzimg const_nan" scale_vaapi_filter_deps="vaapi" +scale_vulkan_filter_deps="vulkan libglslang" vpp_qsv_filter_deps="libmfx" vpp_qsv_filter_select="qsvvpp" yadif_cuda_filter_deps="ffnvcodec" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index af541a9b8dd29..ee3fc1ac11b53 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -364,6 +364,7 @@ OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o vf_scale_cuda.pt OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale_eval.o OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o +OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vf_scale_vulkan.o vulkan.o OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o scale_eval.o OBJS-$(CONFIG_SCROLL_FILTER) += vf_scroll.o OBJS-$(CONFIG_SELECT_FILTER) += f_select.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index c295f8e403575..eb9e2dd0b7b67 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -346,6 +346,7 @@ extern AVFilter ff_vf_scale_cuda; extern AVFilter ff_vf_scale_npp; extern AVFilter ff_vf_scale_qsv; extern AVFilter ff_vf_scale_vaapi; +extern AVFilter ff_vf_scale_vulkan; extern AVFilter ff_vf_scale2ref; extern AVFilter ff_vf_scroll; extern AVFilter ff_vf_select; diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c new file mode 100644 index 0000000000000..1534f2d716113 --- /dev/null +++ b/libavfilter/vf_scale_vulkan.c @@ -0,0 +1,352 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/opt.h" +#include "vulkan.h" +#include "scale_eval.h" +#include "internal.h" + +#define CGROUPS (int [3]){ 32, 32, 1 } + +enum ScalerFunc { + F_BILINEAR = 0, + F_NEAREST, + + F_NB, +}; + +typedef struct ScaleVulkanContext { + VulkanFilterContext vkctx; + + int initialized; + FFVkExecContext *exec; + VulkanPipeline *pl; + + /* Shader updators, must be in the main filter struct */ + VkDescriptorImageInfo input_images[3]; + VkDescriptorImageInfo output_images[3]; + + enum ScalerFunc scaler; + char *output_format_string; + char *w_expr; + char *h_expr; +} ScaleVulkanContext; + +static const char scale_bilinear[] = { + C(0, void scale_bilinear(int idx, ivec2 pos) ) + C(0, { ) + C(1, const vec2 npos = (vec2(pos) + 0.5f) / imageSize(output_img[idx]); ) + C(1, imageStore(output_img[idx], pos, texture(input_img[idx], npos)); ) + C(0, } ) +}; + +static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) +{ + int err; + VkSampler *sampler; + VkFilter sampler_mode; + ScaleVulkanContext *s = ctx->priv; + + switch (s->scaler) { + case F_NEAREST: + sampler_mode = VK_FILTER_NEAREST; + break; + case F_BILINEAR: + sampler_mode = VK_FILTER_LINEAR; + break; + }; + + /* Create a sampler */ + sampler = ff_vk_init_sampler(ctx, 0, sampler_mode); + if (!sampler) + return AVERROR_EXTERNAL; + + s->pl = ff_vk_create_pipeline(ctx); + if (!s->pl) + return AVERROR(ENOMEM); + + { /* Create the shader */ + VulkanDescriptorSetBinding desc_i[2] = { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = av_pix_fmt_count_planes(s->vkctx.input_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .updater = s->input_images, + .samplers = DUP_SAMPLER_ARRAY4(*sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = av_pix_fmt_count_planes(s->vkctx.output_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .updater = s->output_images, + }, + }; + + SPIRVShader *shd = ff_vk_init_shader(ctx, s->pl, "scale_compute", + VK_SHADER_STAGE_COMPUTE_BIT); + if (!shd) + return AVERROR(ENOMEM); + + ff_vk_set_compute_shader_sizes(ctx, shd, CGROUPS); + + RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, desc_i, 2, 0)); /* set 0 */ + + GLSLD( scale_bilinear ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + + for (int i = 0; i < desc_i[1].elems; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) ); + switch (s->scaler) { + case F_NEAREST: + case F_BILINEAR: + GLSLF(2, scale_bilinear(%i, pos); ,i); + break; + }; + } + + GLSLC(0, } ); + + RET(ff_vk_compile_shader(ctx, shd, "main")); + } + + RET(ff_vk_init_pipeline_layout(ctx, s->pl)); + RET(ff_vk_init_compute_pipeline(ctx, s->pl)); + + /* Execution context */ + RET(ff_vk_create_exec_ctx(ctx, &s->exec, + s->vkctx.hwctx->queue_family_comp_index)); + + s->initialized = 1; + + return 0; + +fail: + return err; +} + +static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) +{ + int err = 0; + ScaleVulkanContext *s = avctx->priv; + AVVkFrame *in = (AVVkFrame *)in_f->data[0]; + AVVkFrame *out = (AVVkFrame *)out_f->data[0]; + int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + + for (int i = 0; i < planes; i++) { + RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in->img[i], + av_vkfmt_from_pixfmt(s->vkctx.input_format)[i], + ff_comp_identity_map)); + + RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out->img[i], + av_vkfmt_from_pixfmt(s->vkctx.output_format)[i], + ff_comp_identity_map)); + + s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + } + + ff_vk_update_descriptor_set(avctx, s->pl, 0); + + ff_vk_start_exec_recording(avctx, s->exec); + + for (int i = 0; i < planes; i++) { + VkImageMemoryBarrier bar[2] = { + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .oldLayout = in->layout[i], + .newLayout = s->input_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = in->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .oldLayout = out->layout[i], + .newLayout = s->output_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = out->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + }; + + vkCmdPipelineBarrier(s->exec->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, + 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); + + in->layout[i] = bar[0].newLayout; + in->access[i] = bar[0].dstAccessMask; + + out->layout[i] = bar[1].newLayout; + out->access[i] = bar[1].dstAccessMask; + } + + ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl); + + vkCmdDispatch(s->exec->buf, + FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0], + FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1); + + ff_vk_add_exec_dep(avctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + ff_vk_add_exec_dep(avctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + + err = ff_vk_submit_exec_queue(avctx, s->exec); + if (err) + return err; + + for (int i = 0; i < planes; i++) { + ff_vk_destroy_imageview(avctx, &s->input_images[i].imageView); + ff_vk_destroy_imageview(avctx, &s->output_images[i].imageView); + } + +fail: + return err; +} + +static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + int err; + AVFilterContext *ctx = link->dst; + ScaleVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + + AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + if (!s->initialized) + RET(init_filter(ctx, in)); + + RET(process_frames(ctx, out, in)); + + err = av_frame_copy_props(out, in); + if (err < 0) + goto fail; + + av_frame_free(&in); + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&in); + av_frame_free(&out); + return err; +} + +static int scale_vulkan_config_output(AVFilterLink *outlink) +{ + int err; + AVFilterContext *avctx = outlink->src; + ScaleVulkanContext *s = avctx->priv; + AVFilterLink *inlink = outlink->src->inputs[0]; + + err = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink, + &s->vkctx.output_width, + &s->vkctx.output_height); + if (err < 0) + return err; + + s->vkctx.output_format = s->vkctx.input_format; + + err = ff_vk_filter_config_output(outlink); + if (err < 0) + return err; + + if (inlink->sample_aspect_ratio.num) + outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio); + else + outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; + + return 0; +} + +static void scale_vulkan_uninit(AVFilterContext *avctx) +{ + ScaleVulkanContext *s = avctx->priv; + + ff_vk_filter_uninit(avctx); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(ScaleVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption scale_vulkan_options[] = { + { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS }, + { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS }, + { "scaler", "Scaler function", OFFSET(scaler), AV_OPT_TYPE_INT, {.i64 = F_BILINEAR}, 0, F_NB, .flags = FLAGS, "scaler" }, + { "bilinear", "Bilinear interpolation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = F_BILINEAR}, 0, 0, .flags = FLAGS, "scaler" }, + { "nearest", "Nearest (useful for pixel art)", 0, AV_OPT_TYPE_CONST, {.i64 = F_NEAREST}, 0, 0, .flags = FLAGS, "scaler" }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(scale_vulkan); + +static const AVFilterPad scale_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &scale_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + }, + { NULL } +}; + +static const AVFilterPad scale_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &scale_vulkan_config_output, + }, + { NULL } +}; + +AVFilter ff_vf_scale_vulkan = { + .name = "scale_vulkan", + .description = NULL_IF_CONFIG_SMALL("Scale Vulkan frames"), + .priv_size = sizeof(ScaleVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &scale_vulkan_uninit, + .query_formats = &ff_vk_filter_query_formats, + .inputs = scale_vulkan_inputs, + .outputs = scale_vulkan_outputs, + .priv_class = &scale_vulkan_class, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; From bb1469f0b0696283ee85e5c6019d0a1bf7fd3603 Mon Sep 17 00:00:00 2001 From: Lynne Date: Sun, 27 Oct 2019 14:46:16 +0000 Subject: [PATCH 6/9] lavfi: add an overlay_vulkan filter This commit adds a basic, non-converting overlay filter for Vulkan. --- configure | 1 + libavfilter/Makefile | 1 + libavfilter/allfilters.c | 1 + libavfilter/vf_overlay_vulkan.c | 463 ++++++++++++++++++++++++++++++++ 4 files changed, 466 insertions(+) create mode 100644 libavfilter/vf_overlay_vulkan.c diff --git a/configure b/configure index 6da9cdf4a4fc0..ec11b11301775 100755 --- a/configure +++ b/configure @@ -3531,6 +3531,7 @@ openclsrc_filter_deps="opencl" overlay_opencl_filter_deps="opencl" overlay_qsv_filter_deps="libmfx" overlay_qsv_filter_select="qsvvpp" +overlay_vulkan_filter_deps="vulkan libglslang" owdenoise_filter_deps="gpl" pan_filter_deps="swresample" perspective_filter_deps="gpl" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index ee3fc1ac11b53..54a096cdc6a9b 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -322,6 +322,7 @@ OBJS-$(CONFIG_OVERLAY_FILTER) += vf_overlay.o framesync.o OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) += vf_overlay_opencl.o opencl.o \ opencl/overlay.o framesync.o OBJS-$(CONFIG_OVERLAY_QSV_FILTER) += vf_overlay_qsv.o framesync.o +OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER) += vf_overlay_vulkan.o vulkan.o OBJS-$(CONFIG_OWDENOISE_FILTER) += vf_owdenoise.o OBJS-$(CONFIG_PAD_FILTER) += vf_pad.o OBJS-$(CONFIG_PALETTEGEN_FILTER) += vf_palettegen.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index eb9e2dd0b7b67..97fbb9342238b 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -306,6 +306,7 @@ extern AVFilter ff_vf_oscilloscope; extern AVFilter ff_vf_overlay; extern AVFilter ff_vf_overlay_opencl; extern AVFilter ff_vf_overlay_qsv; +extern AVFilter ff_vf_overlay_vulkan; extern AVFilter ff_vf_owdenoise; extern AVFilter ff_vf_pad; extern AVFilter ff_vf_palettegen; diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c new file mode 100644 index 0000000000000..7cedcc6e8885c --- /dev/null +++ b/libavfilter/vf_overlay_vulkan.c @@ -0,0 +1,463 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/opt.h" +#include "vulkan.h" +#include "internal.h" +#include "framesync.h" + +#define CGROUPS (int [3]){ 32, 32, 1 } + +typedef struct OverlayVulkanContext { + VulkanFilterContext vkctx; + + int initialized; + VulkanPipeline *pl; + FFVkExecContext *exec; + FFFrameSync fs; + FFVkBuffer params_buf; + + /* Shader updators, must be in the main filter struct */ + VkDescriptorImageInfo main_images[3]; + VkDescriptorImageInfo overlay_images[3]; + VkDescriptorImageInfo output_images[3]; + VkDescriptorBufferInfo params_desc; + + int overlay_x; + int overlay_y; + int overlay_w; + int overlay_h; +} OverlayVulkanContext; + +static const char overlay_noalpha[] = { + C(0, void overlay_noalpha(int i, ivec2 pos) ) + C(0, { ) + C(1, if ((o_offset[i].x <= pos.x) && (o_offset[i].y <= pos.y) && + (pos.x < (o_offset[i].x + o_size[i].x)) && + (pos.y < (o_offset[i].y + o_size[i].y))) { ) + C(2, vec4 res = texture(overlay_img[i], pos - o_offset[i]); ) + C(2, imageStore(output_img[i], pos, res); ) + C(1, } else { ) + C(2, vec4 res = texture(main_img[i], pos); ) + C(2, imageStore(output_img[i], pos, res); ) + C(1, } ) + C(0, } ) +}; + +static av_cold int init_filter(AVFilterContext *ctx) +{ + int err; + OverlayVulkanContext *s = ctx->priv; + VkSampler *sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_LINEAR); + if (!sampler) + return AVERROR_EXTERNAL; + + s->pl = ff_vk_create_pipeline(ctx); + if (!s->pl) + return AVERROR(ENOMEM); + + { /* Create the shader */ + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + + VulkanDescriptorSetBinding desc_i[3] = { + { + .name = "main_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .updater = s->main_images, + .samplers = DUP_SAMPLER_ARRAY4(*sampler), + }, + { + .name = "overlay_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .updater = s->overlay_images, + .samplers = DUP_SAMPLER_ARRAY4(*sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .updater = s->output_images, + }, + }; + + VulkanDescriptorSetBinding desc_b = { + .name = "params", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .mem_layout = "std430", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .updater = &s->params_desc, + .buf_content = "ivec2 o_offset[3], o_size[3];", + }; + + SPIRVShader *shd = ff_vk_init_shader(ctx, s->pl, "overlay_compute", + VK_SHADER_STAGE_COMPUTE_BIT); + if (!shd) + return AVERROR(ENOMEM); + + ff_vk_set_compute_shader_sizes(ctx, shd, CGROUPS); + + RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, desc_i, 3, 0)); /* set 0 */ + RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */ + + GLSLD( overlay_noalpha ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLF(1, int planes = %i; ,planes); + GLSLC(1, for (int i = 0; i < planes; i++) { ); + GLSLC(2, overlay_noalpha(i, pos); ); + GLSLC(1, } ); + GLSLC(0, } ); + + RET(ff_vk_compile_shader(ctx, shd, "main")); + } + + RET(ff_vk_init_pipeline_layout(ctx, s->pl)); + RET(ff_vk_init_compute_pipeline(ctx, s->pl)); + + { /* Create and update buffer */ + const AVPixFmtDescriptor *desc; + + /* NOTE: std430 requires the same identical struct layout, padding and + * alignment as C, so we're allowed to do this, as this will map + * exactly to what the shader recieves */ + struct { + int32_t o_offset[2*3]; + int32_t o_size[2*3]; + } *par; + + err = ff_vk_create_buf(ctx, &s->params_buf, + sizeof(*par), + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + if (err) + return err; + + err = ff_vk_map_buffers(ctx, &s->params_buf, (uint8_t **)&par, 1, 0); + if (err) + return err; + + desc = av_pix_fmt_desc_get(s->vkctx.output_format); + + par->o_offset[0] = s->overlay_x; + par->o_offset[1] = s->overlay_y; + par->o_offset[2] = par->o_offset[0] >> desc->log2_chroma_w; + par->o_offset[3] = par->o_offset[1] >> desc->log2_chroma_h; + par->o_offset[4] = par->o_offset[0] >> desc->log2_chroma_w; + par->o_offset[5] = par->o_offset[1] >> desc->log2_chroma_h; + + par->o_size[0] = s->overlay_w; + par->o_size[1] = s->overlay_h; + par->o_size[2] = par->o_size[0] >> desc->log2_chroma_w; + par->o_size[3] = par->o_size[1] >> desc->log2_chroma_h; + par->o_size[4] = par->o_size[0] >> desc->log2_chroma_w; + par->o_size[5] = par->o_size[1] >> desc->log2_chroma_h; + + err = ff_vk_unmap_buffers(ctx, &s->params_buf, 1, 1); + if (err) + return err; + + s->params_desc.buffer = s->params_buf.buf; + s->params_desc.range = VK_WHOLE_SIZE; + + ff_vk_update_descriptor_set(ctx, s->pl, 1); + } + + /* Execution context */ + RET(ff_vk_create_exec_ctx(ctx, &s->exec, + s->vkctx.hwctx->queue_family_comp_index)); + + s->initialized = 1; + + return 0; + +fail: + return err; +} + +static int process_frames(AVFilterContext *avctx, AVFrame *out_f, + AVFrame *main_f, AVFrame *overlay_f) +{ + int err; + OverlayVulkanContext *s = avctx->priv; + int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + + AVVkFrame *out = (AVVkFrame *)out_f->data[0]; + AVVkFrame *main = (AVVkFrame *)main_f->data[0]; + AVVkFrame *overlay = (AVVkFrame *)overlay_f->data[0]; + + AVHWFramesContext *main_fc = (AVHWFramesContext*)main_f->hw_frames_ctx->data; + AVHWFramesContext *overlay_fc = (AVHWFramesContext*)overlay_f->hw_frames_ctx->data; + + for (int i = 0; i < planes; i++) { + RET(ff_vk_create_imageview(avctx, &s->main_images[i].imageView, main->img[i], + av_vkfmt_from_pixfmt(main_fc->sw_format)[i], + ff_comp_identity_map)); + + RET(ff_vk_create_imageview(avctx, &s->overlay_images[i].imageView, overlay->img[i], + av_vkfmt_from_pixfmt(overlay_fc->sw_format)[i], + ff_comp_identity_map)); + + RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out->img[i], + av_vkfmt_from_pixfmt(s->vkctx.output_format)[i], + ff_comp_identity_map)); + + s->main_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + s->overlay_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + } + + ff_vk_update_descriptor_set(avctx, s->pl, 0); + + ff_vk_start_exec_recording(avctx, s->exec); + + for (int i = 0; i < planes; i++) { + VkImageMemoryBarrier bar[3] = { + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .oldLayout = main->layout[i], + .newLayout = s->main_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = main->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .oldLayout = overlay->layout[i], + .newLayout = s->overlay_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = overlay->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .oldLayout = out->layout[i], + .newLayout = s->output_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = out->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + }; + + vkCmdPipelineBarrier(s->exec->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, + 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); + + main->layout[i] = bar[0].newLayout; + main->access[i] = bar[0].dstAccessMask; + + overlay->layout[i] = bar[1].newLayout; + overlay->access[i] = bar[1].dstAccessMask; + + out->layout[i] = bar[2].newLayout; + out->access[i] = bar[2].dstAccessMask; + } + + ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl); + + vkCmdDispatch(s->exec->buf, + FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0], + FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1); + + ff_vk_add_exec_dep(avctx, s->exec, main_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + ff_vk_add_exec_dep(avctx, s->exec, overlay_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + ff_vk_add_exec_dep(avctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + + err = ff_vk_submit_exec_queue(avctx, s->exec); + if (err) + return err; + +fail: + + for (int i = 0; i < planes; i++) { + ff_vk_destroy_imageview(avctx, &s->main_images[i].imageView); + ff_vk_destroy_imageview(avctx, &s->overlay_images[i].imageView); + ff_vk_destroy_imageview(avctx, &s->output_images[i].imageView); + } + + return err; +} + +static int overlay_vulkan_blend(FFFrameSync *fs) +{ + int err; + AVFilterContext *ctx = fs->parent; + OverlayVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + AVFrame *input_main, *input_overlay, *out; + + err = ff_framesync_get_frame(fs, 0, &input_main, 0); + if (err < 0) + goto fail; + err = ff_framesync_get_frame(fs, 1, &input_overlay, 0); + if (err < 0) + goto fail; + + if (!input_main || !input_overlay) + return 0; + + if (!s->initialized) { + AVHWFramesContext *main_fc = (AVHWFramesContext*)input_main->hw_frames_ctx->data; + AVHWFramesContext *overlay_fc = (AVHWFramesContext*)input_overlay->hw_frames_ctx->data; + if (main_fc->sw_format != overlay_fc->sw_format) { + av_log(ctx, AV_LOG_ERROR, "Mismatching sw formats!\n"); + return AVERROR(EINVAL); + } + + s->overlay_w = input_overlay->width; + s->overlay_h = input_overlay->height; + + RET(init_filter(ctx)); + } + + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + RET(process_frames(ctx, out, input_main, input_overlay)); + + err = av_frame_copy_props(out, input_main); + if (err < 0) + goto fail; + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&out); + return err; +} + +static int overlay_vulkan_config_output(AVFilterLink *outlink) +{ + int err; + AVFilterContext *avctx = outlink->src; + OverlayVulkanContext *s = avctx->priv; + + err = ff_vk_filter_config_output(outlink); + if (err < 0) + return err; + + err = ff_framesync_init_dualinput(&s->fs, avctx); + if (err < 0) + return err; + + return ff_framesync_configure(&s->fs); +} + +static int overlay_vulkan_activate(AVFilterContext *avctx) +{ + OverlayVulkanContext *s = avctx->priv; + + return ff_framesync_activate(&s->fs); +} + +static av_cold int overlay_vulkan_init(AVFilterContext *avctx) +{ + OverlayVulkanContext *s = avctx->priv; + + s->fs.on_event = &overlay_vulkan_blend; + + return ff_vk_filter_init(avctx); +} + +static void overlay_vulkan_uninit(AVFilterContext *avctx) +{ + OverlayVulkanContext *s = avctx->priv; + + ff_vk_filter_uninit(avctx); + ff_framesync_uninit(&s->fs); + + ff_vk_free_buf(avctx, &s->params_buf); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(OverlayVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption overlay_vulkan_options[] = { + { "x", "Set horizontal offset", OFFSET(overlay_x), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, .flags = FLAGS }, + { "y", "Set vertical offset", OFFSET(overlay_y), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, .flags = FLAGS }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(overlay_vulkan); + +static const AVFilterPad overlay_vulkan_inputs[] = { + { + .name = "main", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_input, + }, + { + .name = "overlay", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_input, + }, + { NULL } +}; + +static const AVFilterPad overlay_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &overlay_vulkan_config_output, + }, + { NULL } +}; + +AVFilter ff_vf_overlay_vulkan = { + .name = "overlay_vulkan", + .description = NULL_IF_CONFIG_SMALL("Overlay a source on top of another"), + .priv_size = sizeof(OverlayVulkanContext), + .init = &overlay_vulkan_init, + .uninit = &overlay_vulkan_uninit, + .query_formats = &ff_vk_filter_query_formats, + .activate = &overlay_vulkan_activate, + .inputs = overlay_vulkan_inputs, + .outputs = overlay_vulkan_outputs, + .priv_class = &overlay_vulkan_class, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; From cf275888d1fd9c568e62fbca7163f138306bcbae Mon Sep 17 00:00:00 2001 From: Lynne Date: Sun, 27 Oct 2019 14:47:18 +0000 Subject: [PATCH 7/9] lavfi: add an avgblur_vulkan filter This commit adds a fast avgblur Vulkan filter. This will reset Intel GPUs on Windows due to a known, year-old driver bug. --- configure | 1 + libavfilter/Makefile | 1 + libavfilter/allfilters.c | 1 + libavfilter/vf_avgblur_vulkan.c | 406 ++++++++++++++++++++++++++++++++ 4 files changed, 409 insertions(+) create mode 100644 libavfilter/vf_avgblur_vulkan.c diff --git a/configure b/configure index ec11b11301775..d581c64f762f8 100755 --- a/configure +++ b/configure @@ -3460,6 +3460,7 @@ ass_filter_deps="libass" atempo_filter_deps="avcodec" atempo_filter_select="rdft" avgblur_opencl_filter_deps="opencl" +avgblur_vulkan_filter_deps="vulkan libglslang" azmq_filter_deps="libzmq" blackframe_filter_deps="gpl" bm3d_filter_deps="avcodec" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 54a096cdc6a9b..68baa368bc715 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -163,6 +163,7 @@ OBJS-$(CONFIG_ATADENOISE_FILTER) += vf_atadenoise.o OBJS-$(CONFIG_AVGBLUR_FILTER) += vf_avgblur.o OBJS-$(CONFIG_AVGBLUR_OPENCL_FILTER) += vf_avgblur_opencl.o opencl.o \ opencl/avgblur.o boxblur.o +OBJS-$(CONFIG_AVGBLUR_VULKAN_FILTER) += vf_avgblur_vulkan.o vulkan.o OBJS-$(CONFIG_BBOX_FILTER) += bbox.o vf_bbox.o OBJS-$(CONFIG_BENCH_FILTER) += f_bench.o OBJS-$(CONFIG_BILATERAL_FILTER) += vf_bilateral.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 97fbb9342238b..6e1abb31bbb99 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -154,6 +154,7 @@ extern AVFilter ff_vf_ass; extern AVFilter ff_vf_atadenoise; extern AVFilter ff_vf_avgblur; extern AVFilter ff_vf_avgblur_opencl; +extern AVFilter ff_vf_avgblur_vulkan; extern AVFilter ff_vf_bbox; extern AVFilter ff_vf_bench; extern AVFilter ff_vf_bilateral; diff --git a/libavfilter/vf_avgblur_vulkan.c b/libavfilter/vf_avgblur_vulkan.c new file mode 100644 index 0000000000000..7435b0434a726 --- /dev/null +++ b/libavfilter/vf_avgblur_vulkan.c @@ -0,0 +1,406 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/opt.h" +#include "vulkan.h" +#include "internal.h" + +#define CGS 32 + +typedef struct AvgBlurVulkanContext { + VulkanFilterContext vkctx; + + int initialized; + FFVkExecContext *exec; + VulkanPipeline *pl_hor; + VulkanPipeline *pl_ver; + + /* Shader updators, must be in the main filter struct */ + VkDescriptorImageInfo input_images[3]; + VkDescriptorImageInfo tmp_images[3]; + VkDescriptorImageInfo output_images[3]; + + int size_x; + int size_y; + int planes; +} AvgBlurVulkanContext; + +static const char blur_kernel[] = { + C(0, shared vec4 cache[DIR(gl_WorkGroupSize) + FILTER_RADIUS*2]; ) + C(0, ) + C(0, void distort(const ivec2 pos, const int idx) ) + C(0, { ) + C(1, const uint cp = DIR(gl_LocalInvocationID) + FILTER_RADIUS; ) + C(0, ) + C(1, cache[cp] = texture(input_img[idx], pos); ) + C(0, ) + C(1, const ivec2 loc_l = pos - INC(FILTER_RADIUS); ) + C(1, cache[cp - FILTER_RADIUS] = texture(input_img[idx], loc_l); ) + C(0, ) + C(1, const ivec2 loc_h = pos + INC(DIR(gl_WorkGroupSize)); ) + C(1, cache[cp + DIR(gl_WorkGroupSize)] = texture(input_img[idx], loc_h); ) + C(0, ) + C(1, barrier(); ) + C(0, ) + C(1, vec4 sum = vec4(0); ) + C(1, for (int p = -FILTER_RADIUS; p <= FILTER_RADIUS; p++) ) + C(2, sum += cache[cp + p]; ) + C(0, ) + C(1, sum /= vec4(FILTER_RADIUS*2 + 1); ) + C(1, imageStore(output_img[idx], pos, sum); ) + C(0, } ) +}; + +static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) +{ + int err; + SPIRVShader *shd; + AvgBlurVulkanContext *s = ctx->priv; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + VkSampler *sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_LINEAR); + + VulkanDescriptorSetBinding desc_i[2] = { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER_ARRAY4(*sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + if (!sampler) + return AVERROR_EXTERNAL; + + { /* Create shader for the horizontal pass */ + desc_i[0].updater = s->input_images; + desc_i[1].updater = s->tmp_images; + + s->pl_hor = ff_vk_create_pipeline(ctx); + if (!s->pl_hor) + return AVERROR(ENOMEM); + + shd = ff_vk_init_shader(ctx, s->pl_hor, "avgblur_compute_hor", + VK_SHADER_STAGE_COMPUTE_BIT); + + ff_vk_set_compute_shader_sizes(ctx, shd, (int [3]){ CGS, 1, 1 }); + + RET(ff_vk_add_descriptor_set(ctx, s->pl_hor, shd, desc_i, 2, 0)); + + GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_x); + GLSLC(0, #define INC(x) (ivec2(x, 0)) ); + GLSLC(0, #define DIR(var) (var.x) ); + GLSLD( blur_kernel ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + if (s->planes & (1 << i)) { + GLSLF(2, distort(pos, %i); ,i); + } else { + GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i); + GLSLF(2, imageStore(output_img[%i], pos, res); ,i); + } + GLSLC(1, } ); + } + GLSLC(0, } ); + + RET(ff_vk_compile_shader(ctx, shd, "main")); + + RET(ff_vk_init_pipeline_layout(ctx, s->pl_hor)); + RET(ff_vk_init_compute_pipeline(ctx, s->pl_hor)); + } + + { /* Create shader for the vertical pass */ + desc_i[0].updater = s->tmp_images; + desc_i[1].updater = s->output_images; + + s->pl_ver = ff_vk_create_pipeline(ctx); + if (!s->pl_ver) + return AVERROR(ENOMEM); + + shd = ff_vk_init_shader(ctx, s->pl_ver, "avgblur_compute_ver", + VK_SHADER_STAGE_COMPUTE_BIT); + + ff_vk_set_compute_shader_sizes(ctx, shd, (int [3]){ 1, CGS, 1 }); + + RET(ff_vk_add_descriptor_set(ctx, s->pl_ver, shd, desc_i, 2, 0)); + + GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_y); + GLSLC(0, #define INC(x) (ivec2(0, x)) ); + GLSLC(0, #define DIR(var) (var.y) ); + GLSLD( blur_kernel ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + if (s->planes & (1 << i)) { + GLSLF(2, distort(pos, %i); ,i); + } else { + GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i); + GLSLF(2, imageStore(output_img[%i], pos, res); ,i); + } + GLSLC(1, } ); + } + GLSLC(0, } ); + + RET(ff_vk_compile_shader(ctx, shd, "main")); + + RET(ff_vk_init_pipeline_layout(ctx, s->pl_ver)); + RET(ff_vk_init_compute_pipeline(ctx, s->pl_ver)); + } + + /* Execution context */ + RET(ff_vk_create_exec_ctx(ctx, &s->exec, + s->vkctx.hwctx->queue_family_comp_index)); + + s->initialized = 1; + + return 0; + +fail: + return err; +} + +static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f, AVFrame *in_f) +{ + int err; + AvgBlurVulkanContext *s = avctx->priv; + AVVkFrame *in = (AVVkFrame *)in_f->data[0]; + AVVkFrame *tmp = (AVVkFrame *)tmp_f->data[0]; + AVVkFrame *out = (AVVkFrame *)out_f->data[0]; + int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + + for (int i = 0; i < planes; i++) { + RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in->img[i], + av_vkfmt_from_pixfmt(s->vkctx.input_format)[i], + ff_comp_identity_map)); + + RET(ff_vk_create_imageview(avctx, &s->tmp_images[i].imageView, tmp->img[i], + av_vkfmt_from_pixfmt(s->vkctx.output_format)[i], + ff_comp_identity_map)); + + RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out->img[i], + av_vkfmt_from_pixfmt(s->vkctx.output_format)[i], + ff_comp_identity_map)); + + s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + s->tmp_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + } + + ff_vk_update_descriptor_set(avctx, s->pl_hor, 0); + ff_vk_update_descriptor_set(avctx, s->pl_ver, 0); + + ff_vk_start_exec_recording(avctx, s->exec); + + for (int i = 0; i < planes; i++) { + VkImageMemoryBarrier bar[] = { + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .oldLayout = in->layout[i], + .newLayout = s->input_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = in->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT, + .oldLayout = tmp->layout[i], + .newLayout = s->tmp_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = tmp->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .oldLayout = out->layout[i], + .newLayout = s->output_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = out->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + }; + + vkCmdPipelineBarrier(s->exec->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, + 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); + + in->layout[i] = bar[0].newLayout; + in->access[i] = bar[0].dstAccessMask; + + tmp->layout[i] = bar[1].newLayout; + tmp->access[i] = bar[1].dstAccessMask; + + out->layout[i] = bar[2].newLayout; + out->access[i] = bar[2].dstAccessMask; + } + + ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl_hor); + + vkCmdDispatch(s->exec->buf, FFALIGN(s->vkctx.output_width, CGS)/CGS, + s->vkctx.output_height, 1); + + ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl_ver); + + vkCmdDispatch(s->exec->buf, s->vkctx.output_width, + FFALIGN(s->vkctx.output_height, CGS)/CGS, 1); + + ff_vk_add_exec_dep(avctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + ff_vk_add_exec_dep(avctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + + err = ff_vk_submit_exec_queue(avctx, s->exec); + if (err) + return err; + +fail: + + for (int i = 0; i < planes; i++) { + ff_vk_destroy_imageview(avctx, &s->input_images[i].imageView); + ff_vk_destroy_imageview(avctx, &s->tmp_images[i].imageView); + ff_vk_destroy_imageview(avctx, &s->output_images[i].imageView); + } + + return err; +} + +static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + int err; + AVFrame *tmp = NULL, *out = NULL; + AVFilterContext *ctx = link->dst; + AvgBlurVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + tmp = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + if (!s->initialized) + RET(init_filter(ctx, in)); + + RET(process_frames(ctx, out, tmp, in)); + + err = av_frame_copy_props(out, in); + if (err < 0) + goto fail; + + av_frame_free(&in); + av_frame_free(&tmp); + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&in); + av_frame_free(&tmp); + av_frame_free(&out); + return err; +} + +static void avgblur_vulkan_uninit(AVFilterContext *avctx) +{ + AvgBlurVulkanContext *s = avctx->priv; + + ff_vk_filter_uninit(avctx); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(AvgBlurVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption avgblur_vulkan_options[] = { + { "sizeX", "Set horizontal radius", OFFSET(size_x), AV_OPT_TYPE_INT, {.i64 = 2}, 0, 32, .flags = FLAGS }, + { "planes", "Set planes to filter (bitmask)", OFFSET(planes), AV_OPT_TYPE_INT, {.i64 = 0xF}, 0, 0xF, .flags = FLAGS }, + { "sizeY", "Set vertical radius", OFFSET(size_y), AV_OPT_TYPE_INT, {.i64 = 2}, 0, 32, .flags = FLAGS }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(avgblur_vulkan); + +static const AVFilterPad avgblur_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &avgblur_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + }, + { NULL } +}; + +static const AVFilterPad avgblur_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_output, + }, + { NULL } +}; + +AVFilter ff_vf_avgblur_vulkan = { + .name = "avgblur_vulkan", + .description = NULL_IF_CONFIG_SMALL("Apply avgblur mask to input video"), + .priv_size = sizeof(AvgBlurVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &avgblur_vulkan_uninit, + .query_formats = &ff_vk_filter_query_formats, + .inputs = avgblur_vulkan_inputs, + .outputs = avgblur_vulkan_outputs, + .priv_class = &avgblur_vulkan_class, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; From 347811f64cb4cc3790bcd162e604ab0f425d56c1 Mon Sep 17 00:00:00 2001 From: Lynne Date: Sun, 27 Oct 2019 14:48:16 +0000 Subject: [PATCH 8/9] lavfi: add an chromaber_vulkan filter This commit adds a chromatic aberration filter for Vulkan that attempts to emulate a lens chromatic aberration effect. For a YUV frame it will instead shift chroma slightly, providing an approximation to what would otherwise be an accurate emulation. --- configure | 1 + libavfilter/Makefile | 1 + libavfilter/allfilters.c | 1 + libavfilter/vf_chromaber_vulkan.c | 340 ++++++++++++++++++++++++++++++ 4 files changed, 343 insertions(+) create mode 100644 libavfilter/vf_chromaber_vulkan.c diff --git a/configure b/configure index d581c64f762f8..944d832650d8e 100755 --- a/configure +++ b/configure @@ -3468,6 +3468,7 @@ bm3d_filter_select="dct" boxblur_filter_deps="gpl" boxblur_opencl_filter_deps="opencl gpl" bs2b_filter_deps="libbs2b" +chromaber_vulkan_filter_deps="vulkan libglslang" colorkey_opencl_filter_deps="opencl" colormatrix_filter_deps="gpl" convolution_opencl_filter_deps="opencl" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 68baa368bc715..d2e2d496627af 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -176,6 +176,7 @@ OBJS-$(CONFIG_BOXBLUR_FILTER) += vf_boxblur.o boxblur.o OBJS-$(CONFIG_BOXBLUR_OPENCL_FILTER) += vf_avgblur_opencl.o opencl.o \ opencl/avgblur.o boxblur.o OBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o yadif_common.o +OBJS-$(CONFIG_CHROMABER_VULKAN_FILTER) += vf_chromaber_vulkan.o vulkan.o OBJS-$(CONFIG_CHROMAHOLD_FILTER) += vf_chromakey.o OBJS-$(CONFIG_CHROMAKEY_FILTER) += vf_chromakey.o OBJS-$(CONFIG_CHROMASHIFT_FILTER) += vf_chromashift.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 6e1abb31bbb99..b806e275ed478 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -88,6 +88,7 @@ extern AVFilter ff_af_bandreject; extern AVFilter ff_af_bass; extern AVFilter ff_af_biquad; extern AVFilter ff_af_bs2b; +extern AVFilter ff_vf_chromaber_vulkan; extern AVFilter ff_af_channelmap; extern AVFilter ff_af_channelsplit; extern AVFilter ff_af_chorus; diff --git a/libavfilter/vf_chromaber_vulkan.c b/libavfilter/vf_chromaber_vulkan.c new file mode 100644 index 0000000000000..673b3a7a68193 --- /dev/null +++ b/libavfilter/vf_chromaber_vulkan.c @@ -0,0 +1,340 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/opt.h" +#include "vulkan.h" +#include "internal.h" + +#define CGROUPS (int [3]){ 32, 32, 1 } + +typedef struct ChromaticAberrationVulkanContext { + VulkanFilterContext vkctx; + + int initialized; + FFVkExecContext *exec; + VulkanPipeline *pl; + + /* Shader updators, must be in the main filter struct */ + VkDescriptorImageInfo input_images[3]; + VkDescriptorImageInfo output_images[3]; + + /* Push constants / options */ + struct { + float dist[2]; + } opts; +} ChromaticAberrationVulkanContext; + +static const char distort_chroma_kernel[] = { + C(0, void distort_rgb(ivec2 size, ivec2 pos) ) + C(0, { ) + C(1, const vec2 p = ((vec2(pos)/vec2(size)) - 0.5f)*2.0f; ) + C(1, const vec2 o = p * (dist - 1.0f); ) + C(0, ) + C(1, vec4 res; ) + C(1, res.r = texture(input_img[0], ((p - o)/2.0f) + 0.5f).r; ) + C(1, res.g = texture(input_img[0], ((p )/2.0f) + 0.5f).g; ) + C(1, res.b = texture(input_img[0], ((p + o)/2.0f) + 0.5f).b; ) + C(1, res.a = texture(input_img[0], ((p )/2.0f) + 0.5f).a; ) + C(1, imageStore(output_img[0], pos, res); ) + C(0, } ) + C(0, ) + C(0, void distort_chroma(int idx, ivec2 size, ivec2 pos) ) + C(0, { ) + C(1, vec2 p = ((vec2(pos)/vec2(size)) - 0.5f)*2.0f; ) + C(1, float d = sqrt(p.x*p.x + p.y*p.y); ) + C(1, p *= d / (d* dist); ) + C(1, vec4 res = texture(input_img[idx], (p/2.0f) + 0.5f); ) + C(1, imageStore(output_img[idx], pos, res); ) + C(0, } ) +}; + +static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) +{ + int err; + ChromaticAberrationVulkanContext *s = ctx->priv; + + /* Create a sampler */ + VkSampler *sampler = ff_vk_init_sampler(ctx, 0, VK_FILTER_LINEAR); + if (!sampler) + return AVERROR_EXTERNAL; + + s->pl = ff_vk_create_pipeline(ctx); + if (!s->pl) + return AVERROR(ENOMEM); + + /* Normalize options */ + s->opts.dist[0] = (s->opts.dist[0] / 100.0f) + 1.0f; + s->opts.dist[1] = (s->opts.dist[1] / 100.0f) + 1.0f; + + { /* Create the shader */ + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + VulkanDescriptorSetBinding desc_i[2] = { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .updater = s->input_images, + .samplers = DUP_SAMPLER_ARRAY4(*sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .updater = s->output_images, + }, + }; + + SPIRVShader *shd = ff_vk_init_shader(ctx, s->pl, "chromaber_compute", + VK_SHADER_STAGE_COMPUTE_BIT); + if (!shd) + return AVERROR(ENOMEM); + + ff_vk_set_compute_shader_sizes(ctx, shd, CGROUPS); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, vec2 dist; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_add_push_constant(ctx, s->pl, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); + + RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, desc_i, 2, 0)); /* set 0 */ + + GLSLD( distort_chroma_kernel ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + if (planes == 1) { + GLSLC(1, distort_rgb(imageSize(output_img[0]), pos); ); + } else { + GLSLC(1, ivec2 size = imageSize(output_img[0]); ); + GLSLC(1, vec2 npos = vec2(pos)/vec2(size); ); + GLSLC(1, vec4 res = texture(input_img[0], npos); ); + GLSLC(1, imageStore(output_img[0], pos, res); ); + for (int i = 1; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + GLSLF(2, distort_chroma(%i, size, pos); ,i); + GLSLC(1, } else { ); + GLSLC(2, npos = vec2(pos)/vec2(size); ); + GLSLF(2, res = texture(input_img[%i], npos); ,i); + GLSLF(2, imageStore(output_img[%i], pos, res); ,i); + GLSLC(1, } ); + } + } + GLSLC(0, } ); + + RET(ff_vk_compile_shader(ctx, shd, "main")); + } + + RET(ff_vk_init_pipeline_layout(ctx, s->pl)); + RET(ff_vk_init_compute_pipeline(ctx, s->pl)); + + /* Execution context */ + RET(ff_vk_create_exec_ctx(ctx, &s->exec, + s->vkctx.hwctx->queue_family_comp_index)); + + s->initialized = 1; + + return 0; + +fail: + return err; +} + +static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) +{ + int err = 0; + ChromaticAberrationVulkanContext *s = avctx->priv; + AVVkFrame *in = (AVVkFrame *)in_f->data[0]; + AVVkFrame *out = (AVVkFrame *)out_f->data[0]; + int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + + for (int i = 0; i < planes; i++) { + RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in->img[i], + av_vkfmt_from_pixfmt(s->vkctx.input_format)[i], + ff_comp_identity_map)); + + RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out->img[i], + av_vkfmt_from_pixfmt(s->vkctx.output_format)[i], + ff_comp_identity_map)); + + s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + } + + ff_vk_update_descriptor_set(avctx, s->pl, 0); + + ff_vk_start_exec_recording(avctx, s->exec); + + for (int i = 0; i < planes; i++) { + VkImageMemoryBarrier bar[2] = { + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .oldLayout = in->layout[i], + .newLayout = s->input_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = in->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .oldLayout = out->layout[i], + .newLayout = s->output_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = out->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + }; + + vkCmdPipelineBarrier(s->exec->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, + 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); + + in->layout[i] = bar[0].newLayout; + in->access[i] = bar[0].dstAccessMask; + + out->layout[i] = bar[1].newLayout; + out->access[i] = bar[1].dstAccessMask; + } + + ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl); + + ff_vk_update_push_exec(avctx, s->exec, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(s->opts), &s->opts); + + vkCmdDispatch(s->exec->buf, + FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0], + FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1); + + ff_vk_add_exec_dep(avctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + ff_vk_add_exec_dep(avctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + + err = ff_vk_submit_exec_queue(avctx, s->exec); + if (err) + return err; + + for (int i = 0; i < planes; i++) { + ff_vk_destroy_imageview(avctx, &s->input_images[i].imageView); + ff_vk_destroy_imageview(avctx, &s->output_images[i].imageView); + } + +fail: + return err; +} + +static int chromaber_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + int err; + AVFilterContext *ctx = link->dst; + ChromaticAberrationVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + + AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + if (!s->initialized) + RET(init_filter(ctx, in)); + + RET(process_frames(ctx, out, in)); + + err = av_frame_copy_props(out, in); + if (err < 0) + goto fail; + + av_frame_free(&in); + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&in); + av_frame_free(&out); + return err; +} + +static void chromaber_vulkan_uninit(AVFilterContext *avctx) +{ + ChromaticAberrationVulkanContext *s = avctx->priv; + + ff_vk_filter_uninit(avctx); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(ChromaticAberrationVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption chromaber_vulkan_options[] = { + { "dist_x", "Set horizontal distortion amount", OFFSET(opts.dist[0]), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, -10.0f, 10.0f, .flags = FLAGS }, + { "dist_y", "Set vertical distortion amount", OFFSET(opts.dist[1]), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, -10.0f, 10.0f, .flags = FLAGS }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(chromaber_vulkan); + +static const AVFilterPad chromaber_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &chromaber_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + }, + { NULL } +}; + +static const AVFilterPad chromaber_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_output, + }, + { NULL } +}; + +AVFilter ff_vf_chromaber_vulkan = { + .name = "chromaber_vulkan", + .description = NULL_IF_CONFIG_SMALL("Offset chroma of input video (chromatic aberration)"), + .priv_size = sizeof(ChromaticAberrationVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &chromaber_vulkan_uninit, + .query_formats = &ff_vk_filter_query_formats, + .inputs = chromaber_vulkan_inputs, + .outputs = chromaber_vulkan_outputs, + .priv_class = &chromaber_vulkan_class, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; From ecd6ea0f29d017a4093e200c96675057ad627723 Mon Sep 17 00:00:00 2001 From: Philip Langdale Date: Tue, 31 Dec 2019 09:41:57 -0800 Subject: [PATCH 9/9] avutil/hwcontext_cuda: refactor context initialisation There's enough going on here now that it should not be duplicated between cuda_device_create and cuda_device_derive. --- libavutil/hwcontext_cuda.c | 114 ++++++++++++++++--------------------- 1 file changed, 50 insertions(+), 64 deletions(-) diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c index 18abb87bbdc6d..53142edd0a169 100644 --- a/libavutil/hwcontext_cuda.c +++ b/libavutil/hwcontext_cuda.c @@ -336,57 +336,44 @@ static int cuda_device_init(AVHWDeviceContext *ctx) return ret; } -static int cuda_device_create(AVHWDeviceContext *device_ctx, - const char *device, - AVDictionary *opts, int flags) -{ +static int cuda_context_init(AVHWDeviceContext *device_ctx, int flags) { AVCUDADeviceContext *hwctx = device_ctx->hwctx; CudaFunctions *cu; CUcontext dummy; - int ret, dev_active = 0, device_idx = 0; + int ret, dev_active = 0; unsigned int dev_flags = 0; const unsigned int desired_flags = CU_CTX_SCHED_BLOCKING_SYNC; - if (device) - device_idx = strtol(device, NULL, 0); - - if (cuda_device_init(device_ctx) < 0) - goto error; - cu = hwctx->internal->cuda_dl; - ret = CHECK_CU(cu->cuInit(0)); - if (ret < 0) - goto error; - - ret = CHECK_CU(cu->cuDeviceGet(&hwctx->internal->cuda_device, device_idx)); - if (ret < 0) - goto error; - hwctx->internal->flags = flags; if (flags & AV_CUDA_USE_PRIMARY_CONTEXT) { - ret = CHECK_CU(cu->cuDevicePrimaryCtxGetState(hwctx->internal->cuda_device, &dev_flags, &dev_active)); + ret = CHECK_CU(cu->cuDevicePrimaryCtxGetState(hwctx->internal->cuda_device, + &dev_flags, &dev_active)); if (ret < 0) - goto error; + return ret; if (dev_active && dev_flags != desired_flags) { av_log(device_ctx, AV_LOG_ERROR, "Primary context already active with incompatible flags.\n"); - goto error; + return AVERROR(ENOTSUP); } else if (dev_flags != desired_flags) { - ret = CHECK_CU(cu->cuDevicePrimaryCtxSetFlags(hwctx->internal->cuda_device, desired_flags)); + ret = CHECK_CU(cu->cuDevicePrimaryCtxSetFlags(hwctx->internal->cuda_device, + desired_flags)); if (ret < 0) - goto error; + return ret; } - ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, hwctx->internal->cuda_device)); + ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, + hwctx->internal->cuda_device)); if (ret < 0) - goto error; + return ret; } else { - ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, desired_flags, hwctx->internal->cuda_device)); + ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, desired_flags, + hwctx->internal->cuda_device)); if (ret < 0) - goto error; + return ret; CHECK_CU(cu->cuCtxPopCurrent(&dummy)); } @@ -397,6 +384,37 @@ static int cuda_device_create(AVHWDeviceContext *device_ctx, hwctx->stream = NULL; return 0; +} + +static int cuda_device_create(AVHWDeviceContext *device_ctx, + const char *device, + AVDictionary *opts, int flags) +{ + AVCUDADeviceContext *hwctx = device_ctx->hwctx; + CudaFunctions *cu; + int ret, device_idx = 0; + + if (device) + device_idx = strtol(device, NULL, 0); + + if (cuda_device_init(device_ctx) < 0) + goto error; + + cu = hwctx->internal->cuda_dl; + + ret = CHECK_CU(cu->cuInit(0)); + if (ret < 0) + goto error; + + ret = CHECK_CU(cu->cuDeviceGet(&hwctx->internal->cuda_device, device_idx)); + if (ret < 0) + goto error; + + ret = cuda_context_init(device_ctx, flags); + if (ret < 0) + goto error; + + return 0; error: cuda_device_uninit(device_ctx); @@ -409,11 +427,7 @@ static int cuda_device_derive(AVHWDeviceContext *device_ctx, AVCUDADeviceContext *hwctx = device_ctx->hwctx; CudaFunctions *cu; const char *src_uuid = NULL; - CUcontext dummy; - int ret, i, device_count, dev_active = 0; - unsigned int dev_flags = 0; - - const unsigned int desired_flags = CU_CTX_SCHED_BLOCKING_SYNC; + int ret, i, device_count; switch (src_ctx->type) { #if CONFIG_VULKAN @@ -470,37 +484,9 @@ static int cuda_device_derive(AVHWDeviceContext *device_ctx, goto error; } - hwctx->internal->flags = flags; - - if (flags & AV_CUDA_USE_PRIMARY_CONTEXT) { - ret = CHECK_CU(cu->cuDevicePrimaryCtxGetState(hwctx->internal->cuda_device, &dev_flags, &dev_active)); - if (ret < 0) - goto error; - - if (dev_active && dev_flags != desired_flags) { - av_log(device_ctx, AV_LOG_ERROR, "Primary context already active with incompatible flags.\n"); - goto error; - } else if (dev_flags != desired_flags) { - ret = CHECK_CU(cu->cuDevicePrimaryCtxSetFlags(hwctx->internal->cuda_device, desired_flags)); - if (ret < 0) - goto error; - } - - ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, hwctx->internal->cuda_device)); - if (ret < 0) - goto error; - } else { - ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, desired_flags, hwctx->internal->cuda_device)); - if (ret < 0) - goto error; - - CHECK_CU(cu->cuCtxPopCurrent(&dummy)); - } - - hwctx->internal->is_allocated = 1; - - // Setting stream to NULL will make functions automatically use the default CUstream - hwctx->stream = NULL; + ret = cuda_context_init(device_ctx, flags); + if (ret < 0) + goto error; return 0;