diff --git a/configure b/configure index eec43c3b06991..944d832650d8e 100755 --- a/configure +++ b/configure @@ -236,6 +236,7 @@ External library support: --enable-libfontconfig enable libfontconfig, useful for drawtext filter [no] --enable-libfreetype enable libfreetype, needed for drawtext filter [no] --enable-libfribidi enable libfribidi, improves drawtext filter [no] + --enable-libglslang enable GLSL->SPIRV compilation via libglslang [no] --enable-libgme enable Game Music Emu via libgme [no] --enable-libgsm enable GSM de/encoding via libgsm [no] --enable-libiec61883 enable iec61883 via libiec61883 [no] @@ -309,6 +310,7 @@ External library support: --enable-openssl enable openssl, needed for https support if gnutls, libtls or mbedtls is not used [no] --enable-pocketsphinx enable PocketSphinx, needed for asr filter [no] + --enable-vulkan enable Vulkan code [no] --disable-sndio disable sndio support [autodetect] --disable-schannel disable SChannel SSP, needed for TLS support on Windows if openssl and gnutls are not used [autodetect] @@ -1549,11 +1551,11 @@ require_cc(){ } require_cpp(){ - name="$1" - headers="$2" - classes="$3" - shift 3 - check_lib_cpp "$headers" "$classes" "$@" || die "ERROR: $name not found" + log require_cpp "$@" + name_version="$1" + name="${1%% *}" + shift + check_lib_cpp "$name" "$@" || die "ERROR: $name_version not found" } require_headers(){ @@ -1770,6 +1772,7 @@ EXTERNAL_LIBRARY_LIST=" libfontconfig libfreetype libfribidi + libglslang libgme libgsm libiec61883 @@ -1854,6 +1857,7 @@ HWACCEL_LIBRARY_LIST=" mmal omx opencl + vulkan " DOCUMENT_LIST=" @@ -3456,6 +3460,7 @@ ass_filter_deps="libass" atempo_filter_deps="avcodec" atempo_filter_select="rdft" avgblur_opencl_filter_deps="opencl" +avgblur_vulkan_filter_deps="vulkan libglslang" azmq_filter_deps="libzmq" blackframe_filter_deps="gpl" bm3d_filter_deps="avcodec" @@ -3463,6 +3468,7 @@ bm3d_filter_select="dct" boxblur_filter_deps="gpl" boxblur_opencl_filter_deps="opencl gpl" bs2b_filter_deps="libbs2b" +chromaber_vulkan_filter_deps="vulkan libglslang" colorkey_opencl_filter_deps="opencl" colormatrix_filter_deps="gpl" convolution_opencl_filter_deps="opencl" @@ -3527,6 +3533,7 @@ openclsrc_filter_deps="opencl" overlay_opencl_filter_deps="opencl" overlay_qsv_filter_deps="libmfx" overlay_qsv_filter_select="qsvvpp" +overlay_vulkan_filter_deps="vulkan libglslang" owdenoise_filter_deps="gpl" pan_filter_deps="swresample" perspective_filter_deps="gpl" @@ -3594,6 +3601,7 @@ zmq_filter_deps="libzmq" zoompan_filter_deps="swscale" zscale_filter_deps="libzimg const_nan" scale_vaapi_filter_deps="vaapi" +scale_vulkan_filter_deps="vulkan libglslang" vpp_qsv_filter_deps="libmfx" vpp_qsv_filter_select="qsvvpp" yadif_cuda_filter_deps="ffnvcodec" @@ -3640,7 +3648,7 @@ avformat_deps="avcodec avutil" avformat_suggest="libm network zlib" avresample_deps="avutil" avresample_suggest="libm" -avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl user32 vaapi videotoolbox corefoundation corevideo coremedia bcrypt" +avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl user32 vaapi vulkan videotoolbox corefoundation corevideo coremedia bcrypt" postproc_deps="avutil gpl" postproc_suggest="libm" swresample_deps="avutil" @@ -6257,6 +6265,7 @@ enabled fontconfig && enable libfontconfig enabled libfontconfig && require_pkg_config libfontconfig fontconfig "fontconfig/fontconfig.h" FcInit enabled libfreetype && require_pkg_config libfreetype freetype2 "ft2build.h FT_FREETYPE_H" FT_Init_FreeType enabled libfribidi && require_pkg_config libfribidi fribidi fribidi.h fribidi_version_info +enabled libglslang && require_cpp libglslang SPIRV/GlslangToSpv.h "glslang::TIntermediate*" -lglslang -lHLSL -lOGLCompiler -lOSDependent -lSPIRV -lSPVRemapper -lSPIRV -lstdc++ enabled libgme && { check_pkg_config libgme libgme gme/gme.h gme_new_emu || require libgme gme/gme.h gme_new_emu -lgme -lstdc++; } enabled libgsm && { for gsm_hdr in "gsm.h" "gsm/gsm.h"; do @@ -6627,6 +6636,9 @@ enabled vdpau && enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if.h" DtsCrystalHDVersion -lcrystalhd +enabled vulkan && + require_pkg_config vulkan "vulkan >= 1.1.97" "vulkan/vulkan.h" vkCreateInstance + if enabled x86; then case $target_os in mingw32*|mingw64*|win32|win64|linux|cygwin*) diff --git a/doc/APIchanges b/doc/APIchanges index 5b8d801f06acf..394d5eb40a9a2 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -15,6 +15,10 @@ libavutil: 2017-10-21 API changes, most recent first: +2020-ww-xx - xxxxxxxxxx - lavu yy.yy.yyy - hwcontext.h + Add AV_PIX_FMT_VULKAN + Add AV_HWDEVICE_TYPE_VULKAN and implementation. + 2019-12-xx - xxxxxxxxxx - lavu 56.37.100 - buffer.h Add av_buffer_pool_buffer_get_opaque(). diff --git a/doc/filters.texi b/doc/filters.texi index 57330d1fd909d..5037be29eb173 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -11893,7 +11893,18 @@ Upload system memory frames to hardware surfaces. The device to upload to must be supplied when the filter is initialised. If using ffmpeg, select the appropriate device with the @option{-filter_hw_device} -option. +option or with the @option{derive_device} option. The input and output devices +must be of different types and compatible - the exact meaning of this is +system-dependent, but typically it means that they must refer to the same +underlying hardware context (for example, refer to the same graphics card). + +The following additional parameters are accepted: + +@table @option +@item derive_device @var{type} +Rather than using the device supplied at initialisation, instead derive a new +device of type @var{type} from the device the input frames exist on. +@end table @anchor{hwupload_cuda} @section hwupload_cuda diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 37d4eee85848d..d2e2d496627af 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -163,6 +163,7 @@ OBJS-$(CONFIG_ATADENOISE_FILTER) += vf_atadenoise.o OBJS-$(CONFIG_AVGBLUR_FILTER) += vf_avgblur.o OBJS-$(CONFIG_AVGBLUR_OPENCL_FILTER) += vf_avgblur_opencl.o opencl.o \ opencl/avgblur.o boxblur.o +OBJS-$(CONFIG_AVGBLUR_VULKAN_FILTER) += vf_avgblur_vulkan.o vulkan.o OBJS-$(CONFIG_BBOX_FILTER) += bbox.o vf_bbox.o OBJS-$(CONFIG_BENCH_FILTER) += f_bench.o OBJS-$(CONFIG_BILATERAL_FILTER) += vf_bilateral.o @@ -175,6 +176,7 @@ OBJS-$(CONFIG_BOXBLUR_FILTER) += vf_boxblur.o boxblur.o OBJS-$(CONFIG_BOXBLUR_OPENCL_FILTER) += vf_avgblur_opencl.o opencl.o \ opencl/avgblur.o boxblur.o OBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o yadif_common.o +OBJS-$(CONFIG_CHROMABER_VULKAN_FILTER) += vf_chromaber_vulkan.o vulkan.o OBJS-$(CONFIG_CHROMAHOLD_FILTER) += vf_chromakey.o OBJS-$(CONFIG_CHROMAKEY_FILTER) += vf_chromakey.o OBJS-$(CONFIG_CHROMASHIFT_FILTER) += vf_chromashift.o @@ -322,6 +324,7 @@ OBJS-$(CONFIG_OVERLAY_FILTER) += vf_overlay.o framesync.o OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) += vf_overlay_opencl.o opencl.o \ opencl/overlay.o framesync.o OBJS-$(CONFIG_OVERLAY_QSV_FILTER) += vf_overlay_qsv.o framesync.o +OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER) += vf_overlay_vulkan.o vulkan.o OBJS-$(CONFIG_OWDENOISE_FILTER) += vf_owdenoise.o OBJS-$(CONFIG_PAD_FILTER) += vf_pad.o OBJS-$(CONFIG_PALETTEGEN_FILTER) += vf_palettegen.o @@ -364,6 +367,7 @@ OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o vf_scale_cuda.pt OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale_eval.o OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o +OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vf_scale_vulkan.o vulkan.o OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o scale_eval.o OBJS-$(CONFIG_SCROLL_FILTER) += vf_scroll.o OBJS-$(CONFIG_SELECT_FILTER) += f_select.o @@ -506,6 +510,8 @@ SKIPHEADERS-$(CONFIG_QSVVPP) += qsvvpp.h SKIPHEADERS-$(CONFIG_OPENCL) += opencl.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_vpp.h +OBJS-$(CONFIG_LIBGLSLANG) += glslang.o + TOOLS = graph2dot TESTPROGS = drawutils filtfmts formats integral diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index c295f8e403575..b806e275ed478 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -88,6 +88,7 @@ extern AVFilter ff_af_bandreject; extern AVFilter ff_af_bass; extern AVFilter ff_af_biquad; extern AVFilter ff_af_bs2b; +extern AVFilter ff_vf_chromaber_vulkan; extern AVFilter ff_af_channelmap; extern AVFilter ff_af_channelsplit; extern AVFilter ff_af_chorus; @@ -154,6 +155,7 @@ extern AVFilter ff_vf_ass; extern AVFilter ff_vf_atadenoise; extern AVFilter ff_vf_avgblur; extern AVFilter ff_vf_avgblur_opencl; +extern AVFilter ff_vf_avgblur_vulkan; extern AVFilter ff_vf_bbox; extern AVFilter ff_vf_bench; extern AVFilter ff_vf_bilateral; @@ -306,6 +308,7 @@ extern AVFilter ff_vf_oscilloscope; extern AVFilter ff_vf_overlay; extern AVFilter ff_vf_overlay_opencl; extern AVFilter ff_vf_overlay_qsv; +extern AVFilter ff_vf_overlay_vulkan; extern AVFilter ff_vf_owdenoise; extern AVFilter ff_vf_pad; extern AVFilter ff_vf_palettegen; @@ -346,6 +349,7 @@ extern AVFilter ff_vf_scale_cuda; extern AVFilter ff_vf_scale_npp; extern AVFilter ff_vf_scale_qsv; extern AVFilter ff_vf_scale_vaapi; +extern AVFilter ff_vf_scale_vulkan; extern AVFilter ff_vf_scale2ref; extern AVFilter ff_vf_scroll; extern AVFilter ff_vf_select; diff --git a/libavfilter/glslang.cpp b/libavfilter/glslang.cpp new file mode 100644 index 0000000000000..786136186d1b1 --- /dev/null +++ b/libavfilter/glslang.cpp @@ -0,0 +1,215 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include + +extern "C" { +#include "libavutil/mem.h" +} + +#include +#include +#include +#include + +#include "glslang.h" + +using namespace glslang; + +static pthread_mutex_t glslang_mutex = PTHREAD_MUTEX_INITIALIZER; +static int glslang_refcount; + +int glslang_init(void) +{ + int ret = 1; + + pthread_mutex_lock(&glslang_mutex); + if (glslang_refcount++ == 0) + ret = InitializeProcess(); + pthread_mutex_unlock(&glslang_mutex); + + return ret; +} + +void glslang_uninit(void) +{ + pthread_mutex_lock(&glslang_mutex); + if (--glslang_refcount == 0) + FinalizeProcess(); + if (glslang_refcount < 0) + glslang_refcount = 0; + pthread_mutex_unlock(&glslang_mutex); +} + +#define GLSL_VERSION EShTargetVulkan_1_0 +#define SPIRV_VERSION EShTargetSpv_1_0 + +extern const TBuiltInResource DefaultTBuiltInResource; + +GLSlangResult *glslang_compile(const char *glsl, enum GLSlangStage stage) +{ + GLSlangResult *res = (GLSlangResult *)av_mallocz(sizeof(*res)); + + static const EShLanguage lang[] = { + [GLSLANG_VERTEX] = EShLangVertex, + [GLSLANG_FRAGMENT] = EShLangFragment, + [GLSLANG_COMPUTE] = EShLangCompute, + }; + + assert(glslang_refcount); + TShader *shader = new TShader(lang[stage]); + shader->setEnvClient(EShClientVulkan, GLSL_VERSION); + shader->setEnvTarget(EShTargetSpv, SPIRV_VERSION); + shader->setStrings(&glsl, 1); + if (!shader->parse(&DefaultTBuiltInResource, GLSL_VERSION, true, EShMsgDefault)) { + res->error_msg = av_strdup(shader->getInfoLog()); + delete shader; + return res; + } + + TProgram *prog = new TProgram(); + prog->addShader(shader); + if (!prog->link(EShMsgDefault)) { + res->error_msg = av_strdup(prog->getInfoLog()); + delete shader; + delete prog; + return res; + } + + std::vector spirv; + GlslangToSpv(*prog->getIntermediate(lang[stage]), spirv); + + res->success = true; + res->size = spirv.size() * sizeof(unsigned int); + res->data = av_memdup(spirv.data(), res->size), + delete shader; + delete prog; + return res; +} + +// Taken from glslang's examples, which apparently generally bases the choices +// on OpenGL specification limits +const TBuiltInResource DefaultTBuiltInResource = { + /* .MaxLights = */ 32, + /* .MaxClipPlanes = */ 6, + /* .MaxTextureUnits = */ 32, + /* .MaxTextureCoords = */ 32, + /* .MaxVertexAttribs = */ 64, + /* .MaxVertexUniformComponents = */ 4096, + /* .MaxVaryingFloats = */ 64, + /* .MaxVertexTextureImageUnits = */ 32, + /* .MaxCombinedTextureImageUnits = */ 80, + /* .MaxTextureImageUnits = */ 32, + /* .MaxFragmentUniformComponents = */ 4096, + /* .MaxDrawBuffers = */ 32, + /* .MaxVertexUniformVectors = */ 128, + /* .MaxVaryingVectors = */ 8, + /* .MaxFragmentUniformVectors = */ 16, + /* .MaxVertexOutputVectors = */ 16, + /* .MaxFragmentInputVectors = */ 15, + /* .MinProgramTexelOffset = */ -8, + /* .MaxProgramTexelOffset = */ 7, + /* .MaxClipDistances = */ 8, + /* .MaxComputeWorkGroupCountX = */ 65535, + /* .MaxComputeWorkGroupCountY = */ 65535, + /* .MaxComputeWorkGroupCountZ = */ 65535, + /* .MaxComputeWorkGroupSizeX = */ 1024, + /* .MaxComputeWorkGroupSizeY = */ 1024, + /* .MaxComputeWorkGroupSizeZ = */ 64, + /* .MaxComputeUniformComponents = */ 1024, + /* .MaxComputeTextureImageUnits = */ 16, + /* .MaxComputeImageUniforms = */ 8, + /* .MaxComputeAtomicCounters = */ 8, + /* .MaxComputeAtomicCounterBuffers = */ 1, + /* .MaxVaryingComponents = */ 60, + /* .MaxVertexOutputComponents = */ 64, + /* .MaxGeometryInputComponents = */ 64, + /* .MaxGeometryOutputComponents = */ 128, + /* .MaxFragmentInputComponents = */ 128, + /* .MaxImageUnits = */ 8, + /* .MaxCombinedImageUnitsAndFragmentOutputs = */ 8, + /* .MaxCombinedShaderOutputResources = */ 8, + /* .MaxImageSamples = */ 0, + /* .MaxVertexImageUniforms = */ 0, + /* .MaxTessControlImageUniforms = */ 0, + /* .MaxTessEvaluationImageUniforms = */ 0, + /* .MaxGeometryImageUniforms = */ 0, + /* .MaxFragmentImageUniforms = */ 8, + /* .MaxCombinedImageUniforms = */ 8, + /* .MaxGeometryTextureImageUnits = */ 16, + /* .MaxGeometryOutputVertices = */ 256, + /* .MaxGeometryTotalOutputComponents = */ 1024, + /* .MaxGeometryUniformComponents = */ 1024, + /* .MaxGeometryVaryingComponents = */ 64, + /* .MaxTessControlInputComponents = */ 128, + /* .MaxTessControlOutputComponents = */ 128, + /* .MaxTessControlTextureImageUnits = */ 16, + /* .MaxTessControlUniformComponents = */ 1024, + /* .MaxTessControlTotalOutputComponents = */ 4096, + /* .MaxTessEvaluationInputComponents = */ 128, + /* .MaxTessEvaluationOutputComponents = */ 128, + /* .MaxTessEvaluationTextureImageUnits = */ 16, + /* .MaxTessEvaluationUniformComponents = */ 1024, + /* .MaxTessPatchComponents = */ 120, + /* .MaxPatchVertices = */ 32, + /* .MaxTessGenLevel = */ 64, + /* .MaxViewports = */ 16, + /* .MaxVertexAtomicCounters = */ 0, + /* .MaxTessControlAtomicCounters = */ 0, + /* .MaxTessEvaluationAtomicCounters = */ 0, + /* .MaxGeometryAtomicCounters = */ 0, + /* .MaxFragmentAtomicCounters = */ 8, + /* .MaxCombinedAtomicCounters = */ 8, + /* .MaxAtomicCounterBindings = */ 1, + /* .MaxVertexAtomicCounterBuffers = */ 0, + /* .MaxTessControlAtomicCounterBuffers = */ 0, + /* .MaxTessEvaluationAtomicCounterBuffers = */ 0, + /* .MaxGeometryAtomicCounterBuffers = */ 0, + /* .MaxFragmentAtomicCounterBuffers = */ 1, + /* .MaxCombinedAtomicCounterBuffers = */ 1, + /* .MaxAtomicCounterBufferSize = */ 16384, + /* .MaxTransformFeedbackBuffers = */ 4, + /* .MaxTransformFeedbackInterleavedComponents = */ 64, + /* .MaxCullDistances = */ 8, + /* .MaxCombinedClipAndCullDistances = */ 8, + /* .MaxSamples = */ 4, +#if GLSLANG_PATCH_LEVEL >= 2892 + /* .maxMeshOutputVerticesNV = */ 256, + /* .maxMeshOutputPrimitivesNV = */ 512, + /* .maxMeshWorkGroupSizeX_NV = */ 32, + /* .maxMeshWorkGroupSizeY_NV = */ 1, + /* .maxMeshWorkGroupSizeZ_NV = */ 1, + /* .maxTaskWorkGroupSizeX_NV = */ 32, + /* .maxTaskWorkGroupSizeY_NV = */ 1, + /* .maxTaskWorkGroupSizeZ_NV = */ 1, + /* .maxMeshViewCountNV = */ 4, +#endif + + .limits = { + /* .nonInductiveForLoops = */ 1, + /* .whileLoops = */ 1, + /* .doWhileLoops = */ 1, + /* .generalUniformIndexing = */ 1, + /* .generalAttributeMatrixVectorIndexing = */ 1, + /* .generalVaryingIndexing = */ 1, + /* .generalSamplerIndexing = */ 1, + /* .generalVariableIndexing = */ 1, + /* .generalConstantMatrixVectorIndexing = */ 1, + } +}; diff --git a/libavfilter/glslang.h b/libavfilter/glslang.h new file mode 100644 index 0000000000000..865af71580d0a --- /dev/null +++ b/libavfilter/glslang.h @@ -0,0 +1,49 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int glslang_init(void); +void glslang_uninit(void); + +typedef struct GLSlangResult { + int success; + const char *error_msg; + + void *data; /* Shader data or NULL */ + size_t size; +} GLSlangResult; + +enum GLSlangStage { + GLSLANG_VERTEX, + GLSLANG_FRAGMENT, + GLSLANG_COMPUTE, +}; + +/* Compile GLSL into a SPIRV stream, if possible */ +GLSlangResult *glslang_compile(const char *glsl, enum GLSlangStage stage); + +#ifdef __cplusplus +} +#endif diff --git a/libavfilter/vf_avgblur_vulkan.c b/libavfilter/vf_avgblur_vulkan.c new file mode 100644 index 0000000000000..7435b0434a726 --- /dev/null +++ b/libavfilter/vf_avgblur_vulkan.c @@ -0,0 +1,406 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/opt.h" +#include "vulkan.h" +#include "internal.h" + +#define CGS 32 + +typedef struct AvgBlurVulkanContext { + VulkanFilterContext vkctx; + + int initialized; + FFVkExecContext *exec; + VulkanPipeline *pl_hor; + VulkanPipeline *pl_ver; + + /* Shader updators, must be in the main filter struct */ + VkDescriptorImageInfo input_images[3]; + VkDescriptorImageInfo tmp_images[3]; + VkDescriptorImageInfo output_images[3]; + + int size_x; + int size_y; + int planes; +} AvgBlurVulkanContext; + +static const char blur_kernel[] = { + C(0, shared vec4 cache[DIR(gl_WorkGroupSize) + FILTER_RADIUS*2]; ) + C(0, ) + C(0, void distort(const ivec2 pos, const int idx) ) + C(0, { ) + C(1, const uint cp = DIR(gl_LocalInvocationID) + FILTER_RADIUS; ) + C(0, ) + C(1, cache[cp] = texture(input_img[idx], pos); ) + C(0, ) + C(1, const ivec2 loc_l = pos - INC(FILTER_RADIUS); ) + C(1, cache[cp - FILTER_RADIUS] = texture(input_img[idx], loc_l); ) + C(0, ) + C(1, const ivec2 loc_h = pos + INC(DIR(gl_WorkGroupSize)); ) + C(1, cache[cp + DIR(gl_WorkGroupSize)] = texture(input_img[idx], loc_h); ) + C(0, ) + C(1, barrier(); ) + C(0, ) + C(1, vec4 sum = vec4(0); ) + C(1, for (int p = -FILTER_RADIUS; p <= FILTER_RADIUS; p++) ) + C(2, sum += cache[cp + p]; ) + C(0, ) + C(1, sum /= vec4(FILTER_RADIUS*2 + 1); ) + C(1, imageStore(output_img[idx], pos, sum); ) + C(0, } ) +}; + +static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) +{ + int err; + SPIRVShader *shd; + AvgBlurVulkanContext *s = ctx->priv; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + VkSampler *sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_LINEAR); + + VulkanDescriptorSetBinding desc_i[2] = { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER_ARRAY4(*sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + if (!sampler) + return AVERROR_EXTERNAL; + + { /* Create shader for the horizontal pass */ + desc_i[0].updater = s->input_images; + desc_i[1].updater = s->tmp_images; + + s->pl_hor = ff_vk_create_pipeline(ctx); + if (!s->pl_hor) + return AVERROR(ENOMEM); + + shd = ff_vk_init_shader(ctx, s->pl_hor, "avgblur_compute_hor", + VK_SHADER_STAGE_COMPUTE_BIT); + + ff_vk_set_compute_shader_sizes(ctx, shd, (int [3]){ CGS, 1, 1 }); + + RET(ff_vk_add_descriptor_set(ctx, s->pl_hor, shd, desc_i, 2, 0)); + + GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_x); + GLSLC(0, #define INC(x) (ivec2(x, 0)) ); + GLSLC(0, #define DIR(var) (var.x) ); + GLSLD( blur_kernel ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + if (s->planes & (1 << i)) { + GLSLF(2, distort(pos, %i); ,i); + } else { + GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i); + GLSLF(2, imageStore(output_img[%i], pos, res); ,i); + } + GLSLC(1, } ); + } + GLSLC(0, } ); + + RET(ff_vk_compile_shader(ctx, shd, "main")); + + RET(ff_vk_init_pipeline_layout(ctx, s->pl_hor)); + RET(ff_vk_init_compute_pipeline(ctx, s->pl_hor)); + } + + { /* Create shader for the vertical pass */ + desc_i[0].updater = s->tmp_images; + desc_i[1].updater = s->output_images; + + s->pl_ver = ff_vk_create_pipeline(ctx); + if (!s->pl_ver) + return AVERROR(ENOMEM); + + shd = ff_vk_init_shader(ctx, s->pl_ver, "avgblur_compute_ver", + VK_SHADER_STAGE_COMPUTE_BIT); + + ff_vk_set_compute_shader_sizes(ctx, shd, (int [3]){ 1, CGS, 1 }); + + RET(ff_vk_add_descriptor_set(ctx, s->pl_ver, shd, desc_i, 2, 0)); + + GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_y); + GLSLC(0, #define INC(x) (ivec2(0, x)) ); + GLSLC(0, #define DIR(var) (var.y) ); + GLSLD( blur_kernel ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + if (s->planes & (1 << i)) { + GLSLF(2, distort(pos, %i); ,i); + } else { + GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i); + GLSLF(2, imageStore(output_img[%i], pos, res); ,i); + } + GLSLC(1, } ); + } + GLSLC(0, } ); + + RET(ff_vk_compile_shader(ctx, shd, "main")); + + RET(ff_vk_init_pipeline_layout(ctx, s->pl_ver)); + RET(ff_vk_init_compute_pipeline(ctx, s->pl_ver)); + } + + /* Execution context */ + RET(ff_vk_create_exec_ctx(ctx, &s->exec, + s->vkctx.hwctx->queue_family_comp_index)); + + s->initialized = 1; + + return 0; + +fail: + return err; +} + +static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f, AVFrame *in_f) +{ + int err; + AvgBlurVulkanContext *s = avctx->priv; + AVVkFrame *in = (AVVkFrame *)in_f->data[0]; + AVVkFrame *tmp = (AVVkFrame *)tmp_f->data[0]; + AVVkFrame *out = (AVVkFrame *)out_f->data[0]; + int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + + for (int i = 0; i < planes; i++) { + RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in->img[i], + av_vkfmt_from_pixfmt(s->vkctx.input_format)[i], + ff_comp_identity_map)); + + RET(ff_vk_create_imageview(avctx, &s->tmp_images[i].imageView, tmp->img[i], + av_vkfmt_from_pixfmt(s->vkctx.output_format)[i], + ff_comp_identity_map)); + + RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out->img[i], + av_vkfmt_from_pixfmt(s->vkctx.output_format)[i], + ff_comp_identity_map)); + + s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + s->tmp_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + } + + ff_vk_update_descriptor_set(avctx, s->pl_hor, 0); + ff_vk_update_descriptor_set(avctx, s->pl_ver, 0); + + ff_vk_start_exec_recording(avctx, s->exec); + + for (int i = 0; i < planes; i++) { + VkImageMemoryBarrier bar[] = { + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .oldLayout = in->layout[i], + .newLayout = s->input_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = in->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT, + .oldLayout = tmp->layout[i], + .newLayout = s->tmp_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = tmp->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .oldLayout = out->layout[i], + .newLayout = s->output_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = out->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + }; + + vkCmdPipelineBarrier(s->exec->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, + 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); + + in->layout[i] = bar[0].newLayout; + in->access[i] = bar[0].dstAccessMask; + + tmp->layout[i] = bar[1].newLayout; + tmp->access[i] = bar[1].dstAccessMask; + + out->layout[i] = bar[2].newLayout; + out->access[i] = bar[2].dstAccessMask; + } + + ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl_hor); + + vkCmdDispatch(s->exec->buf, FFALIGN(s->vkctx.output_width, CGS)/CGS, + s->vkctx.output_height, 1); + + ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl_ver); + + vkCmdDispatch(s->exec->buf, s->vkctx.output_width, + FFALIGN(s->vkctx.output_height, CGS)/CGS, 1); + + ff_vk_add_exec_dep(avctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + ff_vk_add_exec_dep(avctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + + err = ff_vk_submit_exec_queue(avctx, s->exec); + if (err) + return err; + +fail: + + for (int i = 0; i < planes; i++) { + ff_vk_destroy_imageview(avctx, &s->input_images[i].imageView); + ff_vk_destroy_imageview(avctx, &s->tmp_images[i].imageView); + ff_vk_destroy_imageview(avctx, &s->output_images[i].imageView); + } + + return err; +} + +static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + int err; + AVFrame *tmp = NULL, *out = NULL; + AVFilterContext *ctx = link->dst; + AvgBlurVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + tmp = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + if (!s->initialized) + RET(init_filter(ctx, in)); + + RET(process_frames(ctx, out, tmp, in)); + + err = av_frame_copy_props(out, in); + if (err < 0) + goto fail; + + av_frame_free(&in); + av_frame_free(&tmp); + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&in); + av_frame_free(&tmp); + av_frame_free(&out); + return err; +} + +static void avgblur_vulkan_uninit(AVFilterContext *avctx) +{ + AvgBlurVulkanContext *s = avctx->priv; + + ff_vk_filter_uninit(avctx); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(AvgBlurVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption avgblur_vulkan_options[] = { + { "sizeX", "Set horizontal radius", OFFSET(size_x), AV_OPT_TYPE_INT, {.i64 = 2}, 0, 32, .flags = FLAGS }, + { "planes", "Set planes to filter (bitmask)", OFFSET(planes), AV_OPT_TYPE_INT, {.i64 = 0xF}, 0, 0xF, .flags = FLAGS }, + { "sizeY", "Set vertical radius", OFFSET(size_y), AV_OPT_TYPE_INT, {.i64 = 2}, 0, 32, .flags = FLAGS }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(avgblur_vulkan); + +static const AVFilterPad avgblur_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &avgblur_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + }, + { NULL } +}; + +static const AVFilterPad avgblur_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_output, + }, + { NULL } +}; + +AVFilter ff_vf_avgblur_vulkan = { + .name = "avgblur_vulkan", + .description = NULL_IF_CONFIG_SMALL("Apply avgblur mask to input video"), + .priv_size = sizeof(AvgBlurVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &avgblur_vulkan_uninit, + .query_formats = &ff_vk_filter_query_formats, + .inputs = avgblur_vulkan_inputs, + .outputs = avgblur_vulkan_outputs, + .priv_class = &avgblur_vulkan_class, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; diff --git a/libavfilter/vf_chromaber_vulkan.c b/libavfilter/vf_chromaber_vulkan.c new file mode 100644 index 0000000000000..673b3a7a68193 --- /dev/null +++ b/libavfilter/vf_chromaber_vulkan.c @@ -0,0 +1,340 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/opt.h" +#include "vulkan.h" +#include "internal.h" + +#define CGROUPS (int [3]){ 32, 32, 1 } + +typedef struct ChromaticAberrationVulkanContext { + VulkanFilterContext vkctx; + + int initialized; + FFVkExecContext *exec; + VulkanPipeline *pl; + + /* Shader updators, must be in the main filter struct */ + VkDescriptorImageInfo input_images[3]; + VkDescriptorImageInfo output_images[3]; + + /* Push constants / options */ + struct { + float dist[2]; + } opts; +} ChromaticAberrationVulkanContext; + +static const char distort_chroma_kernel[] = { + C(0, void distort_rgb(ivec2 size, ivec2 pos) ) + C(0, { ) + C(1, const vec2 p = ((vec2(pos)/vec2(size)) - 0.5f)*2.0f; ) + C(1, const vec2 o = p * (dist - 1.0f); ) + C(0, ) + C(1, vec4 res; ) + C(1, res.r = texture(input_img[0], ((p - o)/2.0f) + 0.5f).r; ) + C(1, res.g = texture(input_img[0], ((p )/2.0f) + 0.5f).g; ) + C(1, res.b = texture(input_img[0], ((p + o)/2.0f) + 0.5f).b; ) + C(1, res.a = texture(input_img[0], ((p )/2.0f) + 0.5f).a; ) + C(1, imageStore(output_img[0], pos, res); ) + C(0, } ) + C(0, ) + C(0, void distort_chroma(int idx, ivec2 size, ivec2 pos) ) + C(0, { ) + C(1, vec2 p = ((vec2(pos)/vec2(size)) - 0.5f)*2.0f; ) + C(1, float d = sqrt(p.x*p.x + p.y*p.y); ) + C(1, p *= d / (d* dist); ) + C(1, vec4 res = texture(input_img[idx], (p/2.0f) + 0.5f); ) + C(1, imageStore(output_img[idx], pos, res); ) + C(0, } ) +}; + +static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) +{ + int err; + ChromaticAberrationVulkanContext *s = ctx->priv; + + /* Create a sampler */ + VkSampler *sampler = ff_vk_init_sampler(ctx, 0, VK_FILTER_LINEAR); + if (!sampler) + return AVERROR_EXTERNAL; + + s->pl = ff_vk_create_pipeline(ctx); + if (!s->pl) + return AVERROR(ENOMEM); + + /* Normalize options */ + s->opts.dist[0] = (s->opts.dist[0] / 100.0f) + 1.0f; + s->opts.dist[1] = (s->opts.dist[1] / 100.0f) + 1.0f; + + { /* Create the shader */ + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + VulkanDescriptorSetBinding desc_i[2] = { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .updater = s->input_images, + .samplers = DUP_SAMPLER_ARRAY4(*sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .updater = s->output_images, + }, + }; + + SPIRVShader *shd = ff_vk_init_shader(ctx, s->pl, "chromaber_compute", + VK_SHADER_STAGE_COMPUTE_BIT); + if (!shd) + return AVERROR(ENOMEM); + + ff_vk_set_compute_shader_sizes(ctx, shd, CGROUPS); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, vec2 dist; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_add_push_constant(ctx, s->pl, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); + + RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, desc_i, 2, 0)); /* set 0 */ + + GLSLD( distort_chroma_kernel ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + if (planes == 1) { + GLSLC(1, distort_rgb(imageSize(output_img[0]), pos); ); + } else { + GLSLC(1, ivec2 size = imageSize(output_img[0]); ); + GLSLC(1, vec2 npos = vec2(pos)/vec2(size); ); + GLSLC(1, vec4 res = texture(input_img[0], npos); ); + GLSLC(1, imageStore(output_img[0], pos, res); ); + for (int i = 1; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + GLSLF(2, distort_chroma(%i, size, pos); ,i); + GLSLC(1, } else { ); + GLSLC(2, npos = vec2(pos)/vec2(size); ); + GLSLF(2, res = texture(input_img[%i], npos); ,i); + GLSLF(2, imageStore(output_img[%i], pos, res); ,i); + GLSLC(1, } ); + } + } + GLSLC(0, } ); + + RET(ff_vk_compile_shader(ctx, shd, "main")); + } + + RET(ff_vk_init_pipeline_layout(ctx, s->pl)); + RET(ff_vk_init_compute_pipeline(ctx, s->pl)); + + /* Execution context */ + RET(ff_vk_create_exec_ctx(ctx, &s->exec, + s->vkctx.hwctx->queue_family_comp_index)); + + s->initialized = 1; + + return 0; + +fail: + return err; +} + +static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) +{ + int err = 0; + ChromaticAberrationVulkanContext *s = avctx->priv; + AVVkFrame *in = (AVVkFrame *)in_f->data[0]; + AVVkFrame *out = (AVVkFrame *)out_f->data[0]; + int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + + for (int i = 0; i < planes; i++) { + RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in->img[i], + av_vkfmt_from_pixfmt(s->vkctx.input_format)[i], + ff_comp_identity_map)); + + RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out->img[i], + av_vkfmt_from_pixfmt(s->vkctx.output_format)[i], + ff_comp_identity_map)); + + s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + } + + ff_vk_update_descriptor_set(avctx, s->pl, 0); + + ff_vk_start_exec_recording(avctx, s->exec); + + for (int i = 0; i < planes; i++) { + VkImageMemoryBarrier bar[2] = { + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .oldLayout = in->layout[i], + .newLayout = s->input_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = in->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .oldLayout = out->layout[i], + .newLayout = s->output_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = out->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + }; + + vkCmdPipelineBarrier(s->exec->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, + 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); + + in->layout[i] = bar[0].newLayout; + in->access[i] = bar[0].dstAccessMask; + + out->layout[i] = bar[1].newLayout; + out->access[i] = bar[1].dstAccessMask; + } + + ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl); + + ff_vk_update_push_exec(avctx, s->exec, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(s->opts), &s->opts); + + vkCmdDispatch(s->exec->buf, + FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0], + FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1); + + ff_vk_add_exec_dep(avctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + ff_vk_add_exec_dep(avctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + + err = ff_vk_submit_exec_queue(avctx, s->exec); + if (err) + return err; + + for (int i = 0; i < planes; i++) { + ff_vk_destroy_imageview(avctx, &s->input_images[i].imageView); + ff_vk_destroy_imageview(avctx, &s->output_images[i].imageView); + } + +fail: + return err; +} + +static int chromaber_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + int err; + AVFilterContext *ctx = link->dst; + ChromaticAberrationVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + + AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + if (!s->initialized) + RET(init_filter(ctx, in)); + + RET(process_frames(ctx, out, in)); + + err = av_frame_copy_props(out, in); + if (err < 0) + goto fail; + + av_frame_free(&in); + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&in); + av_frame_free(&out); + return err; +} + +static void chromaber_vulkan_uninit(AVFilterContext *avctx) +{ + ChromaticAberrationVulkanContext *s = avctx->priv; + + ff_vk_filter_uninit(avctx); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(ChromaticAberrationVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption chromaber_vulkan_options[] = { + { "dist_x", "Set horizontal distortion amount", OFFSET(opts.dist[0]), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, -10.0f, 10.0f, .flags = FLAGS }, + { "dist_y", "Set vertical distortion amount", OFFSET(opts.dist[1]), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, -10.0f, 10.0f, .flags = FLAGS }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(chromaber_vulkan); + +static const AVFilterPad chromaber_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &chromaber_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + }, + { NULL } +}; + +static const AVFilterPad chromaber_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_output, + }, + { NULL } +}; + +AVFilter ff_vf_chromaber_vulkan = { + .name = "chromaber_vulkan", + .description = NULL_IF_CONFIG_SMALL("Offset chroma of input video (chromatic aberration)"), + .priv_size = sizeof(ChromaticAberrationVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &chromaber_vulkan_uninit, + .query_formats = &ff_vk_filter_query_formats, + .inputs = chromaber_vulkan_inputs, + .outputs = chromaber_vulkan_outputs, + .priv_class = &chromaber_vulkan_class, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; diff --git a/libavfilter/vf_hwupload.c b/libavfilter/vf_hwupload.c index 50bc7e10f6eb9..7c5dd497b0ea2 100644 --- a/libavfilter/vf_hwupload.c +++ b/libavfilter/vf_hwupload.c @@ -32,10 +32,11 @@ typedef struct HWUploadContext { const AVClass *class; AVBufferRef *hwdevice_ref; - AVHWDeviceContext *hwdevice; AVBufferRef *hwframes_ref; AVHWFramesContext *hwframes; + + char *device_type; } HWUploadContext; static int hwupload_query_formats(AVFilterContext *avctx) @@ -46,17 +47,27 @@ static int hwupload_query_formats(AVFilterContext *avctx) AVFilterFormats *input_formats = NULL; int err, i; - if (!avctx->hw_device_ctx) { + if (ctx->hwdevice_ref) { + /* We already have a specified device. */ + } else if (avctx->hw_device_ctx) { + if (ctx->device_type) { + err = av_hwdevice_ctx_create_derived( + &ctx->hwdevice_ref, + av_hwdevice_find_type_by_name(ctx->device_type), + avctx->hw_device_ctx, 0); + if (err < 0) + return err; + } else { + ctx->hwdevice_ref = av_buffer_ref(avctx->hw_device_ctx); + if (!ctx->hwdevice_ref) + return AVERROR(ENOMEM); + } + } else { av_log(ctx, AV_LOG_ERROR, "A hardware device reference is required " "to upload frames to.\n"); return AVERROR(EINVAL); } - ctx->hwdevice_ref = av_buffer_ref(avctx->hw_device_ctx); - if (!ctx->hwdevice_ref) - return AVERROR(ENOMEM); - ctx->hwdevice = (AVHWDeviceContext*)ctx->hwdevice_ref->data; - constraints = av_hwdevice_get_hwframe_constraints(ctx->hwdevice_ref, NULL); if (!constraints) { err = AVERROR(EINVAL); @@ -127,7 +138,13 @@ static int hwupload_config_output(AVFilterLink *outlink) av_get_pix_fmt_name(inlink->format)); ctx->hwframes->format = outlink->format; - ctx->hwframes->sw_format = inlink->format; + if (inlink->hw_frames_ctx) { + AVHWFramesContext *in_hwframe_ctx = + (AVHWFramesContext*)inlink->hw_frames_ctx->data; + ctx->hwframes->sw_format = in_hwframe_ctx->sw_format; + } else { + ctx->hwframes->sw_format = inlink->format; + } ctx->hwframes->width = inlink->w; ctx->hwframes->height = inlink->h; @@ -200,13 +217,21 @@ static av_cold void hwupload_uninit(AVFilterContext *avctx) av_buffer_unref(&ctx->hwdevice_ref); } -static const AVClass hwupload_class = { - .class_name = "hwupload", - .item_name = av_default_item_name, - .option = NULL, - .version = LIBAVUTIL_VERSION_INT, +#define OFFSET(x) offsetof(HWUploadContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption hwupload_options[] = { + { + "derive_device", "Derive a new device of this type", + OFFSET(device_type), AV_OPT_TYPE_STRING, + { .str = NULL }, 0, 0, FLAGS + }, + { + NULL + } }; +AVFILTER_DEFINE_CLASS(hwupload); + static const AVFilterPad hwupload_inputs[] = { { .name = "default", diff --git a/libavfilter/vf_hwupload_cuda.c b/libavfilter/vf_hwupload_cuda.c index 4d83e6c8f27ed..8ee0825859c08 100644 --- a/libavfilter/vf_hwupload_cuda.c +++ b/libavfilter/vf_hwupload_cuda.c @@ -60,6 +60,9 @@ static int cudaupload_query_formats(AVFilterContext *ctx) AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_P010, AV_PIX_FMT_P016, AV_PIX_FMT_YUV444P16, AV_PIX_FMT_0RGB32, AV_PIX_FMT_0BGR32, +#if CONFIG_VULKAN + AV_PIX_FMT_VULKAN, +#endif AV_PIX_FMT_NONE, }; static const enum AVPixelFormat output_pix_fmts[] = { @@ -97,7 +100,12 @@ static int cudaupload_config_output(AVFilterLink *outlink) hwframe_ctx = (AVHWFramesContext*)s->hwframe->data; hwframe_ctx->format = AV_PIX_FMT_CUDA; - hwframe_ctx->sw_format = inlink->format; + if (inlink->hw_frames_ctx) { + AVHWFramesContext *in_hwframe_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; + hwframe_ctx->sw_format = in_hwframe_ctx->sw_format; + } else { + hwframe_ctx->sw_format = inlink->format; + } hwframe_ctx->width = inlink->w; hwframe_ctx->height = inlink->h; diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c new file mode 100644 index 0000000000000..7cedcc6e8885c --- /dev/null +++ b/libavfilter/vf_overlay_vulkan.c @@ -0,0 +1,463 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/opt.h" +#include "vulkan.h" +#include "internal.h" +#include "framesync.h" + +#define CGROUPS (int [3]){ 32, 32, 1 } + +typedef struct OverlayVulkanContext { + VulkanFilterContext vkctx; + + int initialized; + VulkanPipeline *pl; + FFVkExecContext *exec; + FFFrameSync fs; + FFVkBuffer params_buf; + + /* Shader updators, must be in the main filter struct */ + VkDescriptorImageInfo main_images[3]; + VkDescriptorImageInfo overlay_images[3]; + VkDescriptorImageInfo output_images[3]; + VkDescriptorBufferInfo params_desc; + + int overlay_x; + int overlay_y; + int overlay_w; + int overlay_h; +} OverlayVulkanContext; + +static const char overlay_noalpha[] = { + C(0, void overlay_noalpha(int i, ivec2 pos) ) + C(0, { ) + C(1, if ((o_offset[i].x <= pos.x) && (o_offset[i].y <= pos.y) && + (pos.x < (o_offset[i].x + o_size[i].x)) && + (pos.y < (o_offset[i].y + o_size[i].y))) { ) + C(2, vec4 res = texture(overlay_img[i], pos - o_offset[i]); ) + C(2, imageStore(output_img[i], pos, res); ) + C(1, } else { ) + C(2, vec4 res = texture(main_img[i], pos); ) + C(2, imageStore(output_img[i], pos, res); ) + C(1, } ) + C(0, } ) +}; + +static av_cold int init_filter(AVFilterContext *ctx) +{ + int err; + OverlayVulkanContext *s = ctx->priv; + VkSampler *sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_LINEAR); + if (!sampler) + return AVERROR_EXTERNAL; + + s->pl = ff_vk_create_pipeline(ctx); + if (!s->pl) + return AVERROR(ENOMEM); + + { /* Create the shader */ + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + + VulkanDescriptorSetBinding desc_i[3] = { + { + .name = "main_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .updater = s->main_images, + .samplers = DUP_SAMPLER_ARRAY4(*sampler), + }, + { + .name = "overlay_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .updater = s->overlay_images, + .samplers = DUP_SAMPLER_ARRAY4(*sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .updater = s->output_images, + }, + }; + + VulkanDescriptorSetBinding desc_b = { + .name = "params", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .mem_layout = "std430", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .updater = &s->params_desc, + .buf_content = "ivec2 o_offset[3], o_size[3];", + }; + + SPIRVShader *shd = ff_vk_init_shader(ctx, s->pl, "overlay_compute", + VK_SHADER_STAGE_COMPUTE_BIT); + if (!shd) + return AVERROR(ENOMEM); + + ff_vk_set_compute_shader_sizes(ctx, shd, CGROUPS); + + RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, desc_i, 3, 0)); /* set 0 */ + RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */ + + GLSLD( overlay_noalpha ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLF(1, int planes = %i; ,planes); + GLSLC(1, for (int i = 0; i < planes; i++) { ); + GLSLC(2, overlay_noalpha(i, pos); ); + GLSLC(1, } ); + GLSLC(0, } ); + + RET(ff_vk_compile_shader(ctx, shd, "main")); + } + + RET(ff_vk_init_pipeline_layout(ctx, s->pl)); + RET(ff_vk_init_compute_pipeline(ctx, s->pl)); + + { /* Create and update buffer */ + const AVPixFmtDescriptor *desc; + + /* NOTE: std430 requires the same identical struct layout, padding and + * alignment as C, so we're allowed to do this, as this will map + * exactly to what the shader recieves */ + struct { + int32_t o_offset[2*3]; + int32_t o_size[2*3]; + } *par; + + err = ff_vk_create_buf(ctx, &s->params_buf, + sizeof(*par), + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + if (err) + return err; + + err = ff_vk_map_buffers(ctx, &s->params_buf, (uint8_t **)&par, 1, 0); + if (err) + return err; + + desc = av_pix_fmt_desc_get(s->vkctx.output_format); + + par->o_offset[0] = s->overlay_x; + par->o_offset[1] = s->overlay_y; + par->o_offset[2] = par->o_offset[0] >> desc->log2_chroma_w; + par->o_offset[3] = par->o_offset[1] >> desc->log2_chroma_h; + par->o_offset[4] = par->o_offset[0] >> desc->log2_chroma_w; + par->o_offset[5] = par->o_offset[1] >> desc->log2_chroma_h; + + par->o_size[0] = s->overlay_w; + par->o_size[1] = s->overlay_h; + par->o_size[2] = par->o_size[0] >> desc->log2_chroma_w; + par->o_size[3] = par->o_size[1] >> desc->log2_chroma_h; + par->o_size[4] = par->o_size[0] >> desc->log2_chroma_w; + par->o_size[5] = par->o_size[1] >> desc->log2_chroma_h; + + err = ff_vk_unmap_buffers(ctx, &s->params_buf, 1, 1); + if (err) + return err; + + s->params_desc.buffer = s->params_buf.buf; + s->params_desc.range = VK_WHOLE_SIZE; + + ff_vk_update_descriptor_set(ctx, s->pl, 1); + } + + /* Execution context */ + RET(ff_vk_create_exec_ctx(ctx, &s->exec, + s->vkctx.hwctx->queue_family_comp_index)); + + s->initialized = 1; + + return 0; + +fail: + return err; +} + +static int process_frames(AVFilterContext *avctx, AVFrame *out_f, + AVFrame *main_f, AVFrame *overlay_f) +{ + int err; + OverlayVulkanContext *s = avctx->priv; + int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + + AVVkFrame *out = (AVVkFrame *)out_f->data[0]; + AVVkFrame *main = (AVVkFrame *)main_f->data[0]; + AVVkFrame *overlay = (AVVkFrame *)overlay_f->data[0]; + + AVHWFramesContext *main_fc = (AVHWFramesContext*)main_f->hw_frames_ctx->data; + AVHWFramesContext *overlay_fc = (AVHWFramesContext*)overlay_f->hw_frames_ctx->data; + + for (int i = 0; i < planes; i++) { + RET(ff_vk_create_imageview(avctx, &s->main_images[i].imageView, main->img[i], + av_vkfmt_from_pixfmt(main_fc->sw_format)[i], + ff_comp_identity_map)); + + RET(ff_vk_create_imageview(avctx, &s->overlay_images[i].imageView, overlay->img[i], + av_vkfmt_from_pixfmt(overlay_fc->sw_format)[i], + ff_comp_identity_map)); + + RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out->img[i], + av_vkfmt_from_pixfmt(s->vkctx.output_format)[i], + ff_comp_identity_map)); + + s->main_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + s->overlay_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + } + + ff_vk_update_descriptor_set(avctx, s->pl, 0); + + ff_vk_start_exec_recording(avctx, s->exec); + + for (int i = 0; i < planes; i++) { + VkImageMemoryBarrier bar[3] = { + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .oldLayout = main->layout[i], + .newLayout = s->main_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = main->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .oldLayout = overlay->layout[i], + .newLayout = s->overlay_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = overlay->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .oldLayout = out->layout[i], + .newLayout = s->output_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = out->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + }; + + vkCmdPipelineBarrier(s->exec->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, + 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); + + main->layout[i] = bar[0].newLayout; + main->access[i] = bar[0].dstAccessMask; + + overlay->layout[i] = bar[1].newLayout; + overlay->access[i] = bar[1].dstAccessMask; + + out->layout[i] = bar[2].newLayout; + out->access[i] = bar[2].dstAccessMask; + } + + ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl); + + vkCmdDispatch(s->exec->buf, + FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0], + FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1); + + ff_vk_add_exec_dep(avctx, s->exec, main_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + ff_vk_add_exec_dep(avctx, s->exec, overlay_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + ff_vk_add_exec_dep(avctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + + err = ff_vk_submit_exec_queue(avctx, s->exec); + if (err) + return err; + +fail: + + for (int i = 0; i < planes; i++) { + ff_vk_destroy_imageview(avctx, &s->main_images[i].imageView); + ff_vk_destroy_imageview(avctx, &s->overlay_images[i].imageView); + ff_vk_destroy_imageview(avctx, &s->output_images[i].imageView); + } + + return err; +} + +static int overlay_vulkan_blend(FFFrameSync *fs) +{ + int err; + AVFilterContext *ctx = fs->parent; + OverlayVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + AVFrame *input_main, *input_overlay, *out; + + err = ff_framesync_get_frame(fs, 0, &input_main, 0); + if (err < 0) + goto fail; + err = ff_framesync_get_frame(fs, 1, &input_overlay, 0); + if (err < 0) + goto fail; + + if (!input_main || !input_overlay) + return 0; + + if (!s->initialized) { + AVHWFramesContext *main_fc = (AVHWFramesContext*)input_main->hw_frames_ctx->data; + AVHWFramesContext *overlay_fc = (AVHWFramesContext*)input_overlay->hw_frames_ctx->data; + if (main_fc->sw_format != overlay_fc->sw_format) { + av_log(ctx, AV_LOG_ERROR, "Mismatching sw formats!\n"); + return AVERROR(EINVAL); + } + + s->overlay_w = input_overlay->width; + s->overlay_h = input_overlay->height; + + RET(init_filter(ctx)); + } + + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + RET(process_frames(ctx, out, input_main, input_overlay)); + + err = av_frame_copy_props(out, input_main); + if (err < 0) + goto fail; + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&out); + return err; +} + +static int overlay_vulkan_config_output(AVFilterLink *outlink) +{ + int err; + AVFilterContext *avctx = outlink->src; + OverlayVulkanContext *s = avctx->priv; + + err = ff_vk_filter_config_output(outlink); + if (err < 0) + return err; + + err = ff_framesync_init_dualinput(&s->fs, avctx); + if (err < 0) + return err; + + return ff_framesync_configure(&s->fs); +} + +static int overlay_vulkan_activate(AVFilterContext *avctx) +{ + OverlayVulkanContext *s = avctx->priv; + + return ff_framesync_activate(&s->fs); +} + +static av_cold int overlay_vulkan_init(AVFilterContext *avctx) +{ + OverlayVulkanContext *s = avctx->priv; + + s->fs.on_event = &overlay_vulkan_blend; + + return ff_vk_filter_init(avctx); +} + +static void overlay_vulkan_uninit(AVFilterContext *avctx) +{ + OverlayVulkanContext *s = avctx->priv; + + ff_vk_filter_uninit(avctx); + ff_framesync_uninit(&s->fs); + + ff_vk_free_buf(avctx, &s->params_buf); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(OverlayVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption overlay_vulkan_options[] = { + { "x", "Set horizontal offset", OFFSET(overlay_x), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, .flags = FLAGS }, + { "y", "Set vertical offset", OFFSET(overlay_y), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, .flags = FLAGS }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(overlay_vulkan); + +static const AVFilterPad overlay_vulkan_inputs[] = { + { + .name = "main", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_input, + }, + { + .name = "overlay", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_input, + }, + { NULL } +}; + +static const AVFilterPad overlay_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &overlay_vulkan_config_output, + }, + { NULL } +}; + +AVFilter ff_vf_overlay_vulkan = { + .name = "overlay_vulkan", + .description = NULL_IF_CONFIG_SMALL("Overlay a source on top of another"), + .priv_size = sizeof(OverlayVulkanContext), + .init = &overlay_vulkan_init, + .uninit = &overlay_vulkan_uninit, + .query_formats = &ff_vk_filter_query_formats, + .activate = &overlay_vulkan_activate, + .inputs = overlay_vulkan_inputs, + .outputs = overlay_vulkan_outputs, + .priv_class = &overlay_vulkan_class, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c new file mode 100644 index 0000000000000..1534f2d716113 --- /dev/null +++ b/libavfilter/vf_scale_vulkan.c @@ -0,0 +1,352 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/opt.h" +#include "vulkan.h" +#include "scale_eval.h" +#include "internal.h" + +#define CGROUPS (int [3]){ 32, 32, 1 } + +enum ScalerFunc { + F_BILINEAR = 0, + F_NEAREST, + + F_NB, +}; + +typedef struct ScaleVulkanContext { + VulkanFilterContext vkctx; + + int initialized; + FFVkExecContext *exec; + VulkanPipeline *pl; + + /* Shader updators, must be in the main filter struct */ + VkDescriptorImageInfo input_images[3]; + VkDescriptorImageInfo output_images[3]; + + enum ScalerFunc scaler; + char *output_format_string; + char *w_expr; + char *h_expr; +} ScaleVulkanContext; + +static const char scale_bilinear[] = { + C(0, void scale_bilinear(int idx, ivec2 pos) ) + C(0, { ) + C(1, const vec2 npos = (vec2(pos) + 0.5f) / imageSize(output_img[idx]); ) + C(1, imageStore(output_img[idx], pos, texture(input_img[idx], npos)); ) + C(0, } ) +}; + +static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) +{ + int err; + VkSampler *sampler; + VkFilter sampler_mode; + ScaleVulkanContext *s = ctx->priv; + + switch (s->scaler) { + case F_NEAREST: + sampler_mode = VK_FILTER_NEAREST; + break; + case F_BILINEAR: + sampler_mode = VK_FILTER_LINEAR; + break; + }; + + /* Create a sampler */ + sampler = ff_vk_init_sampler(ctx, 0, sampler_mode); + if (!sampler) + return AVERROR_EXTERNAL; + + s->pl = ff_vk_create_pipeline(ctx); + if (!s->pl) + return AVERROR(ENOMEM); + + { /* Create the shader */ + VulkanDescriptorSetBinding desc_i[2] = { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = av_pix_fmt_count_planes(s->vkctx.input_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .updater = s->input_images, + .samplers = DUP_SAMPLER_ARRAY4(*sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = av_pix_fmt_count_planes(s->vkctx.output_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .updater = s->output_images, + }, + }; + + SPIRVShader *shd = ff_vk_init_shader(ctx, s->pl, "scale_compute", + VK_SHADER_STAGE_COMPUTE_BIT); + if (!shd) + return AVERROR(ENOMEM); + + ff_vk_set_compute_shader_sizes(ctx, shd, CGROUPS); + + RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, desc_i, 2, 0)); /* set 0 */ + + GLSLD( scale_bilinear ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + + for (int i = 0; i < desc_i[1].elems; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) ); + switch (s->scaler) { + case F_NEAREST: + case F_BILINEAR: + GLSLF(2, scale_bilinear(%i, pos); ,i); + break; + }; + } + + GLSLC(0, } ); + + RET(ff_vk_compile_shader(ctx, shd, "main")); + } + + RET(ff_vk_init_pipeline_layout(ctx, s->pl)); + RET(ff_vk_init_compute_pipeline(ctx, s->pl)); + + /* Execution context */ + RET(ff_vk_create_exec_ctx(ctx, &s->exec, + s->vkctx.hwctx->queue_family_comp_index)); + + s->initialized = 1; + + return 0; + +fail: + return err; +} + +static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) +{ + int err = 0; + ScaleVulkanContext *s = avctx->priv; + AVVkFrame *in = (AVVkFrame *)in_f->data[0]; + AVVkFrame *out = (AVVkFrame *)out_f->data[0]; + int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + + for (int i = 0; i < planes; i++) { + RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in->img[i], + av_vkfmt_from_pixfmt(s->vkctx.input_format)[i], + ff_comp_identity_map)); + + RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out->img[i], + av_vkfmt_from_pixfmt(s->vkctx.output_format)[i], + ff_comp_identity_map)); + + s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + } + + ff_vk_update_descriptor_set(avctx, s->pl, 0); + + ff_vk_start_exec_recording(avctx, s->exec); + + for (int i = 0; i < planes; i++) { + VkImageMemoryBarrier bar[2] = { + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, + .oldLayout = in->layout[i], + .newLayout = s->input_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = in->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .oldLayout = out->layout[i], + .newLayout = s->output_images[i].imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = out->img[i], + .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .subresourceRange.levelCount = 1, + .subresourceRange.layerCount = 1, + }, + }; + + vkCmdPipelineBarrier(s->exec->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, + 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); + + in->layout[i] = bar[0].newLayout; + in->access[i] = bar[0].dstAccessMask; + + out->layout[i] = bar[1].newLayout; + out->access[i] = bar[1].dstAccessMask; + } + + ff_vk_bind_pipeline_exec(avctx, s->exec, s->pl); + + vkCmdDispatch(s->exec->buf, + FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0], + FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1); + + ff_vk_add_exec_dep(avctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + ff_vk_add_exec_dep(avctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + + err = ff_vk_submit_exec_queue(avctx, s->exec); + if (err) + return err; + + for (int i = 0; i < planes; i++) { + ff_vk_destroy_imageview(avctx, &s->input_images[i].imageView); + ff_vk_destroy_imageview(avctx, &s->output_images[i].imageView); + } + +fail: + return err; +} + +static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + int err; + AVFilterContext *ctx = link->dst; + ScaleVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + + AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + if (!s->initialized) + RET(init_filter(ctx, in)); + + RET(process_frames(ctx, out, in)); + + err = av_frame_copy_props(out, in); + if (err < 0) + goto fail; + + av_frame_free(&in); + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&in); + av_frame_free(&out); + return err; +} + +static int scale_vulkan_config_output(AVFilterLink *outlink) +{ + int err; + AVFilterContext *avctx = outlink->src; + ScaleVulkanContext *s = avctx->priv; + AVFilterLink *inlink = outlink->src->inputs[0]; + + err = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink, + &s->vkctx.output_width, + &s->vkctx.output_height); + if (err < 0) + return err; + + s->vkctx.output_format = s->vkctx.input_format; + + err = ff_vk_filter_config_output(outlink); + if (err < 0) + return err; + + if (inlink->sample_aspect_ratio.num) + outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio); + else + outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; + + return 0; +} + +static void scale_vulkan_uninit(AVFilterContext *avctx) +{ + ScaleVulkanContext *s = avctx->priv; + + ff_vk_filter_uninit(avctx); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(ScaleVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption scale_vulkan_options[] = { + { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS }, + { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS }, + { "scaler", "Scaler function", OFFSET(scaler), AV_OPT_TYPE_INT, {.i64 = F_BILINEAR}, 0, F_NB, .flags = FLAGS, "scaler" }, + { "bilinear", "Bilinear interpolation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = F_BILINEAR}, 0, 0, .flags = FLAGS, "scaler" }, + { "nearest", "Nearest (useful for pixel art)", 0, AV_OPT_TYPE_CONST, {.i64 = F_NEAREST}, 0, 0, .flags = FLAGS, "scaler" }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(scale_vulkan); + +static const AVFilterPad scale_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &scale_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + }, + { NULL } +}; + +static const AVFilterPad scale_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &scale_vulkan_config_output, + }, + { NULL } +}; + +AVFilter ff_vf_scale_vulkan = { + .name = "scale_vulkan", + .description = NULL_IF_CONFIG_SMALL("Scale Vulkan frames"), + .priv_size = sizeof(ScaleVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &scale_vulkan_uninit, + .query_formats = &ff_vk_filter_query_formats, + .inputs = scale_vulkan_inputs, + .outputs = scale_vulkan_outputs, + .priv_class = &scale_vulkan_class, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; diff --git a/libavfilter/vulkan.c b/libavfilter/vulkan.c new file mode 100644 index 0000000000000..99aaeb2ef42d1 --- /dev/null +++ b/libavfilter/vulkan.c @@ -0,0 +1,1221 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "formats.h" +#include "vulkan.h" +#include "glslang.h" + +/* Generic macro for creating contexts which need to keep their addresses + * if another context is created. */ +#define FN_CREATING(ctx, type, shortname, array, num) \ +static av_always_inline type *create_ ##shortname(ctx *dctx) \ +{ \ + type **array, *sctx = av_mallocz(sizeof(*sctx)); \ + if (!sctx) \ + return NULL; \ + \ + array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\ + if (!array) { \ + av_free(sctx); \ + return NULL; \ + } \ + \ + dctx->array = array; \ + dctx->array[dctx->num++] = sctx; \ + \ + return sctx; \ +} + +const VkComponentMapping ff_comp_identity_map = { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, +}; + +/* Converts return values to strings */ +const char *ff_vk_ret2str(VkResult res) +{ +#define CASE(VAL) case VAL: return #VAL + switch (res) { + CASE(VK_SUCCESS); + CASE(VK_NOT_READY); + CASE(VK_TIMEOUT); + CASE(VK_EVENT_SET); + CASE(VK_EVENT_RESET); + CASE(VK_INCOMPLETE); + CASE(VK_ERROR_OUT_OF_HOST_MEMORY); + CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY); + CASE(VK_ERROR_INITIALIZATION_FAILED); + CASE(VK_ERROR_DEVICE_LOST); + CASE(VK_ERROR_MEMORY_MAP_FAILED); + CASE(VK_ERROR_LAYER_NOT_PRESENT); + CASE(VK_ERROR_EXTENSION_NOT_PRESENT); + CASE(VK_ERROR_FEATURE_NOT_PRESENT); + CASE(VK_ERROR_INCOMPATIBLE_DRIVER); + CASE(VK_ERROR_TOO_MANY_OBJECTS); + CASE(VK_ERROR_FORMAT_NOT_SUPPORTED); + CASE(VK_ERROR_FRAGMENTED_POOL); + CASE(VK_ERROR_SURFACE_LOST_KHR); + CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); + CASE(VK_SUBOPTIMAL_KHR); + CASE(VK_ERROR_OUT_OF_DATE_KHR); + CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); + CASE(VK_ERROR_VALIDATION_FAILED_EXT); + CASE(VK_ERROR_INVALID_SHADER_NV); + CASE(VK_ERROR_OUT_OF_POOL_MEMORY); + CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); + CASE(VK_ERROR_NOT_PERMITTED_EXT); + default: return "Unknown error"; + } +#undef CASE +} + +static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) +{ + VkResult ret; + int index = -1; + VkPhysicalDeviceProperties props; + VkPhysicalDeviceMemoryProperties mprops; + VulkanFilterContext *s = avctx->priv; + + VkMemoryAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = alloc_extension, + }; + + vkGetPhysicalDeviceProperties(s->hwctx->phys_dev, &props); + vkGetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &mprops); + + /* Align if we need to */ + if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + req->size = FFALIGN(req->size, props.limits.minMemoryMapAlignment); + + alloc_info.allocationSize = req->size; + + /* The vulkan spec requires memory types to be sorted in the "optimal" + * order, so the first matching type we find will be the best/fastest one */ + for (int i = 0; i < mprops.memoryTypeCount; i++) { + /* The memory type must be supported by the requirements (bitfield) */ + if (!(req->memoryTypeBits & (1 << i))) + continue; + + /* The memory type flags must include our properties */ + if ((mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags) + continue; + + /* Found a suitable memory type */ + index = i; + break; + } + + if (index < 0) { + av_log(avctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", + req_flags); + return AVERROR(EINVAL); + } + + alloc_info.memoryTypeIndex = index; + + ret = vkAllocateMemory(s->hwctx->act_dev, &alloc_info, + s->hwctx->alloc, mem); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR(ENOMEM); + } + + *mem_flags |= mprops.memoryTypes[index].propertyFlags; + + return 0; +} + +int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) +{ + int err; + VkResult ret; + VkMemoryRequirements req; + VulkanFilterContext *s = avctx->priv; + + VkBufferCreateInfo buf_spawn = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = NULL, + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .size = size, /* Gets FFALIGNED during alloc if host visible + but should be ok */ + }; + + ret = vkCreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to create buffer: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + vkGetBufferMemoryRequirements(s->hwctx->act_dev, buf->buf, &req); + + err = vk_alloc_mem(avctx, &req, flags, NULL, &buf->flags, &buf->mem); + if (err) + return err; + + ret = vkBindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[], + int nb_buffers, int invalidate) +{ + VkResult ret; + VulkanFilterContext *s = avctx->priv; + VkMappedMemoryRange *inval_list = NULL; + int inval_count = 0; + + for (int i = 0; i < nb_buffers; i++) { + ret = vkMapMemory(s->hwctx->act_dev, buf[i].mem, 0, + VK_WHOLE_SIZE, 0, (void **)&mem[i]); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + if (!invalidate) + return 0; + + for (int i = 0; i < nb_buffers; i++) { + const VkMappedMemoryRange ival_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = buf[i].mem, + .size = VK_WHOLE_SIZE, + }; + if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + continue; + inval_list = av_fast_realloc(s->scratch, &s->scratch_size, + (++inval_count)*sizeof(*inval_list)); + if (!inval_list) + return AVERROR(ENOMEM); + inval_list[inval_count - 1] = ival_buf; + } + + if (inval_count) { + ret = vkInvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count, + inval_list); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to invalidate memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + return 0; +} + +int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers, + int flush) +{ + int err = 0; + VkResult ret; + VulkanFilterContext *s = avctx->priv; + VkMappedMemoryRange *flush_list = NULL; + int flush_count = 0; + + if (flush) { + for (int i = 0; i < nb_buffers; i++) { + const VkMappedMemoryRange flush_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = buf[i].mem, + .size = VK_WHOLE_SIZE, + }; + if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + continue; + flush_list = av_fast_realloc(s->scratch, &s->scratch_size, + (++flush_count)*sizeof(*flush_list)); + if (!flush_list) + return AVERROR(ENOMEM); + flush_list[flush_count - 1] = flush_buf; + } + } + + if (flush_count) { + ret = vkFlushMappedMemoryRanges(s->hwctx->act_dev, flush_count, + flush_list); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; /* We still want to try to unmap them */ + } + } + + for (int i = 0; i < nb_buffers; i++) + vkUnmapMemory(s->hwctx->act_dev, buf[i].mem); + + return err; +} + +void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf) +{ + VulkanFilterContext *s = avctx->priv; + if (!buf) + return; + + if (buf->buf != VK_NULL_HANDLE) + vkDestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc); + if (buf->mem != VK_NULL_HANDLE) + vkFreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); +} + +int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl, + int offset, int size, VkShaderStageFlagBits stage) +{ + VkPushConstantRange *pc; + + pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts), + pl->push_consts_num + 1); + if (!pl->push_consts) + return AVERROR(ENOMEM); + + pc = &pl->push_consts[pl->push_consts_num++]; + memset(pc, 0, sizeof(*pc)); + + pc->stageFlags = stage; + pc->offset = offset; + pc->size = size; + + return 0; +} + +FN_CREATING(VulkanFilterContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num) +int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx, int queue) +{ + VkResult ret; + FFVkExecContext *e; + VulkanFilterContext *s = avctx->priv; + + VkCommandPoolCreateInfo cqueue_create = { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = queue, + }; + VkCommandBufferAllocateInfo cbuf_create = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = 1, + }; + VkFenceCreateInfo fence_spawn = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO }; + + e = create_exec_ctx(s); + if (!e) + return AVERROR(ENOMEM); + + ret = vkCreateCommandPool(s->hwctx->act_dev, &cqueue_create, + s->hwctx->alloc, &e->pool); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Command pool creation failure: %s\n", + ff_vk_ret2str(ret)); + return 1; + } + + cbuf_create.commandPool = e->pool; + + ret = vkAllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, &e->buf); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", + ff_vk_ret2str(ret)); + return 1; + } + + ret = vkCreateFence(s->hwctx->act_dev, &fence_spawn, + s->hwctx->alloc, &e->fence); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n", + ff_vk_ret2str(ret)); + return 1; + } + + vkGetDeviceQueue(s->hwctx->act_dev, queue, 0, &e->queue); + + *ctx = e; + + return 0; +} + +int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e) +{ + VkResult ret; + VkCommandBufferBeginInfo cmd_start = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + + e->sem_wait_cnt = 0; + e->sem_sig_cnt = 0; + + ret = vkBeginCommandBuffer(e->buf, &cmd_start); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to start command recoding: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e, + AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag) +{ + AVVkFrame *f = (AVVkFrame *)frame->data[0]; + AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data; + int planes = av_pix_fmt_count_planes(fc->sw_format); + + for (int i = 0; i < planes; i++) { + e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc, + (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait)); + if (!e->sem_wait) + return AVERROR(ENOMEM); + + e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc, + (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst)); + if (!e->sem_wait_dst) + return AVERROR(ENOMEM); + + e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc, + (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig)); + if (!e->sem_sig) + return AVERROR(ENOMEM); + + e->sem_wait[e->sem_wait_cnt] = f->sem[i]; + e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; + e->sem_wait_cnt++; + + e->sem_sig[e->sem_sig_cnt] = f->sem[i]; + e->sem_sig_cnt++; + } + + return 0; +} + +int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e) +{ + VkResult ret; + VulkanFilterContext *s = avctx->priv; + + VkSubmitInfo s_info = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &e->buf, + + .pWaitSemaphores = e->sem_wait, + .pWaitDstStageMask = e->sem_wait_dst, + .waitSemaphoreCount = e->sem_wait_cnt, + + .pSignalSemaphores = e->sem_sig, + .signalSemaphoreCount = e->sem_sig_cnt, + }; + + vkEndCommandBuffer(e->buf); + + ret = vkQueueSubmit(e->queue, 1, &s_info, e->fence); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + vkWaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); + vkResetFences(s->hwctx->act_dev, 1, &e->fence); + + return 0; +} + +int ff_vk_filter_query_formats(AVFilterContext *avctx) +{ + static const enum AVPixelFormat pixel_formats[] = { + AV_PIX_FMT_VULKAN, AV_PIX_FMT_NONE, + }; + AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats); + if (!pix_fmts) + return AVERROR(ENOMEM); + + return ff_set_common_formats(avctx, pix_fmts); +} + +static int vulkan_filter_set_device(AVFilterContext *avctx, + AVBufferRef *device) +{ + VulkanFilterContext *s = avctx->priv; + + av_buffer_unref(&s->device_ref); + + s->device_ref = av_buffer_ref(device); + if (!s->device_ref) + return AVERROR(ENOMEM); + + s->device = (AVHWDeviceContext*)s->device_ref->data; + s->hwctx = s->device->hwctx; + + return 0; +} + +static int vulkan_filter_set_frames(AVFilterContext *avctx, + AVBufferRef *frames) +{ + VulkanFilterContext *s = avctx->priv; + + av_buffer_unref(&s->frames_ref); + + s->frames_ref = av_buffer_ref(frames); + if (!s->frames_ref) + return AVERROR(ENOMEM); + + return 0; +} + +int ff_vk_filter_config_input(AVFilterLink *inlink) +{ + int err; + AVFilterContext *avctx = inlink->dst; + VulkanFilterContext *s = avctx->priv; + AVHWFramesContext *input_frames; + + if (!inlink->hw_frames_ctx) { + av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " + "hardware frames context on the input.\n"); + return AVERROR(EINVAL); + } + + /* Extract the device and default output format from the first input. */ + if (avctx->inputs[0] != inlink) + return 0; + + input_frames = (AVHWFramesContext*)inlink->hw_frames_ctx->data; + if (input_frames->format != AV_PIX_FMT_VULKAN) + return AVERROR(EINVAL); + + err = vulkan_filter_set_device(avctx, input_frames->device_ref); + if (err < 0) + return err; + err = vulkan_filter_set_frames(avctx, inlink->hw_frames_ctx); + if (err < 0) + return err; + + /* Default output parameters match input parameters. */ + s->input_format = input_frames->sw_format; + if (s->output_format == AV_PIX_FMT_NONE) + s->output_format = input_frames->sw_format; + if (!s->output_width) + s->output_width = inlink->w; + if (!s->output_height) + s->output_height = inlink->h; + + return 0; +} + +int ff_vk_filter_config_output_inplace(AVFilterLink *outlink) +{ + int err; + AVFilterContext *avctx = outlink->src; + VulkanFilterContext *s = avctx->priv; + + av_buffer_unref(&outlink->hw_frames_ctx); + + if (!s->device_ref) { + if (!avctx->hw_device_ctx) { + av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " + "Vulkan device.\n"); + return AVERROR(EINVAL); + } + + err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx); + if (err < 0) + return err; + } + + outlink->hw_frames_ctx = av_buffer_ref(s->frames_ref); + outlink->w = s->output_width; + outlink->h = s->output_height; + + return 0; +} + +int ff_vk_filter_config_output(AVFilterLink *outlink) +{ + int err; + AVFilterContext *avctx = outlink->src; + VulkanFilterContext *s = avctx->priv; + AVBufferRef *output_frames_ref; + AVHWFramesContext *output_frames; + + av_buffer_unref(&outlink->hw_frames_ctx); + + if (!s->device_ref) { + if (!avctx->hw_device_ctx) { + av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " + "Vulkan device.\n"); + return AVERROR(EINVAL); + } + + err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx); + if (err < 0) + return err; + } + + output_frames_ref = av_hwframe_ctx_alloc(s->device_ref); + if (!output_frames_ref) { + err = AVERROR(ENOMEM); + goto fail; + } + output_frames = (AVHWFramesContext*)output_frames_ref->data; + + output_frames->format = AV_PIX_FMT_VULKAN; + output_frames->sw_format = s->output_format; + output_frames->width = s->output_width; + output_frames->height = s->output_height; + + err = av_hwframe_ctx_init(output_frames_ref); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to initialise output " + "frames: %d.\n", err); + goto fail; + } + + outlink->hw_frames_ctx = output_frames_ref; + outlink->w = s->output_width; + outlink->h = s->output_height; + + return 0; +fail: + av_buffer_unref(&output_frames_ref); + return err; +} + +int ff_vk_filter_init(AVFilterContext *avctx) +{ + VulkanFilterContext *s = avctx->priv; + + s->output_format = AV_PIX_FMT_NONE; + + if (!glslang_init()) + return AVERROR_EXTERNAL; + + return 0; +} + +FN_CREATING(VulkanFilterContext, VkSampler, sampler, samplers, samplers_num) +VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords, + VkFilter filt) +{ + VkResult ret; + VulkanFilterContext *s = avctx->priv; + + VkSamplerCreateInfo sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = filt, + .minFilter = sampler_info.magFilter, + .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST : + VK_SAMPLER_MIPMAP_MODE_LINEAR, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeV = sampler_info.addressModeU, + .addressModeW = sampler_info.addressModeU, + .anisotropyEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_NEVER, + .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, + .unnormalizedCoordinates = unnorm_coords, + }; + + VkSampler *sampler = create_sampler(s); + if (!sampler) + return NULL; + + ret = vkCreateSampler(s->hwctx->act_dev, &sampler_info, + s->hwctx->alloc, sampler); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init sampler: %s\n", + ff_vk_ret2str(ret)); + return NULL; + } + + return sampler; +} + +const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt); + const int high = desc->comp[0].depth > 8; + return high ? "rgba16f" : "rgba8"; +} + +int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, VkImage img, + VkFormat fmt, const VkComponentMapping map) +{ + VulkanFilterContext *s = avctx->priv; + VkImageViewCreateInfo imgview_spawn = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = NULL, + .image = img, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = fmt, + .components = map, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + + VkResult ret = vkCreateImageView(s->hwctx->act_dev, &imgview_spawn, + s->hwctx->alloc, v); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +void ff_vk_destroy_imageview(AVFilterContext *avctx, VkImageView *v) +{ + VulkanFilterContext *s = avctx->priv; + if (v && *v) { + vkDestroyImageView(s->hwctx->act_dev, *v, s->hwctx->alloc); + *v = NULL; + } +} + +FN_CREATING(VulkanPipeline, SPIRVShader, shader, shaders, shaders_num) +SPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl, + const char *name, VkShaderStageFlags stage) +{ + SPIRVShader *shd = create_shader(pl); + if (!shd) + return NULL; + + av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED); + + shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shd->shader.stage = stage; + + shd->name = name; + + GLSLF(0, #version %i ,460); + GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) ); + GLSLC(0, ); + + return shd; +} + +void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, SPIRVShader *shd, + int local_size[3]) +{ + shd->local_size[0] = local_size[0]; + shd->local_size[1] = local_size[1]; + shd->local_size[2] = local_size[2]; + + av_bprintf(&shd->src, "layout (local_size_x = %i, " + "local_size_y = %i, local_size_z = %i) in;\n\n", + shd->local_size[0], shd->local_size[1], shd->local_size[2]); +} + +static void print_shader(AVFilterContext *avctx, SPIRVShader *shd) +{ + int line = 0; + const char *p = shd->src.str; + const char *start = p; + + AVBPrint buf; + av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED); + + for (int i = 0; i < strlen(p); i++) { + if (p[i] == '\n') { + av_bprintf(&buf, "%i\t", ++line); + av_bprint_append_data(&buf, start, &p[i] - start + 1); + start = &p[i + 1]; + } + } + + av_log(avctx, AV_LOG_VERBOSE, "Compiling shader %s: \n%s", + shd->name, buf.str); + av_bprint_finalize(&buf, NULL); +} + +int ff_vk_compile_shader(AVFilterContext *avctx, SPIRVShader *shd, + const char *entrypoint) +{ + VkResult ret; + VulkanFilterContext *s = avctx->priv; + VkShaderModuleCreateInfo shader_create; + GLSlangResult *res; + + static const enum GLSlangStage emap[] = { + [VK_SHADER_STAGE_VERTEX_BIT] = GLSLANG_VERTEX, + [VK_SHADER_STAGE_FRAGMENT_BIT] = GLSLANG_FRAGMENT, + [VK_SHADER_STAGE_COMPUTE_BIT] = GLSLANG_COMPUTE, + }; + + shd->shader.pName = entrypoint; + + print_shader(avctx, shd); + + res = glslang_compile(shd->src.str, emap[shd->shader.stage]); + + av_bprint_finalize(&shd->src, NULL); + + if (!res->success) { + av_log(avctx, AV_LOG_ERROR, "%s", res->error_msg); + return AVERROR_EXTERNAL; + } + + shader_create.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + shader_create.pNext = NULL; + shader_create.codeSize = res->size; + shader_create.flags = 0; + shader_create.pCode = res->data; + + ret = vkCreateShaderModule(s->hwctx->act_dev, &shader_create, NULL, + &shd->shader.module); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create shader module: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + av_log(avctx, AV_LOG_VERBOSE, "Shader linked! Size: %zu bytes\n", + shader_create.codeSize); + + return 0; +} + +static const struct descriptor_props { + size_t struct_size; /* Size of the opaque which updates the descriptor */ + const char *type; + int is_uniform; + int mem_quali; /* Can use a memory qualifier */ + int dim_needed; /* Must indicate dimension */ + int buf_content; /* Must indicate buffer contents */ +} descriptor_props[] = { + [VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, }, + [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, }, + [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, }, + [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, }, + [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, }, + [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, }, + [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, }, + [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, }, + [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, }, + [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, }, + [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, }, +}; + +int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, + SPIRVShader *shd, VulkanDescriptorSetBinding *desc, + int num, int only_print_to_shader) +{ + VkResult ret; + VkDescriptorSetLayout *layout; + VulkanFilterContext *s = avctx->priv; + + if (only_print_to_shader) + goto print; + + pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout), + pl->descriptor_sets_num + 1); + if (!pl->desc_layout) + return AVERROR(ENOMEM); + + layout = &pl->desc_layout[pl->descriptor_sets_num]; + memset(layout, 0, sizeof(*layout)); + + { /* Create descriptor set layout descriptions */ + VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 }; + VkDescriptorSetLayoutBinding *desc_binding; + + desc_binding = av_mallocz(sizeof(*desc_binding)*num); + if (!desc_binding) + return AVERROR(ENOMEM); + + for (int i = 0; i < num; i++) { + desc_binding[i].binding = i; + desc_binding[i].descriptorType = desc[i].type; + desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1); + desc_binding[i].stageFlags = desc[i].stages; + desc_binding[i].pImmutableSamplers = desc[i].samplers; + } + + desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + desc_create_layout.pBindings = desc_binding; + desc_create_layout.bindingCount = num; + + ret = vkCreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, + s->hwctx->alloc, layout); + av_free(desc_binding); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set " + "layout: %s\n", ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + { /* Pool each descriptor by type and update pool counts */ + for (int i = 0; i < num; i++) { + int j; + for (j = 0; j < pl->pool_size_desc_num; j++) + if (pl->pool_size_desc[j].type == desc[i].type) + break; + if (j >= pl->pool_size_desc_num) { + pl->pool_size_desc = av_realloc_array(pl->pool_size_desc, + sizeof(*pl->pool_size_desc), + ++pl->pool_size_desc_num); + if (!pl->pool_size_desc) + return AVERROR(ENOMEM); + memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize)); + } + pl->pool_size_desc[j].type = desc[i].type; + pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1); + } + } + + { /* Create template creation struct */ + VkDescriptorUpdateTemplateCreateInfo *dt; + VkDescriptorUpdateTemplateEntry *des_entries; + + /* Freed after descriptor set initialization */ + des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry)); + if (!des_entries) + return AVERROR(ENOMEM); + + for (int i = 0; i < num; i++) { + des_entries[i].dstBinding = i; + des_entries[i].descriptorType = desc[i].type; + des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1); + des_entries[i].dstArrayElement = 0; + des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s; + des_entries[i].stride = descriptor_props[desc[i].type].struct_size; + } + + pl->desc_template_info = av_realloc_array(pl->desc_template_info, + sizeof(*pl->desc_template_info), + pl->descriptor_sets_num + 1); + if (!pl->desc_template_info) + return AVERROR(ENOMEM); + + dt = &pl->desc_template_info[pl->descriptor_sets_num]; + memset(dt, 0, sizeof(*dt)); + + dt->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO; + dt->templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET; + dt->descriptorSetLayout = *layout; + dt->pDescriptorUpdateEntries = des_entries; + dt->descriptorUpdateEntryCount = num; + } + + pl->descriptor_sets_num++; + +print: + /* Write shader info */ + for (int i = 0; i < num; i++) { + const struct descriptor_props *prop = &descriptor_props[desc[i].type]; + GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i); + + if (desc[i].mem_layout) + GLSLA(", %s", desc[i].mem_layout); + GLSLA(")"); + + if (prop->is_uniform) + GLSLA(" uniform"); + + if (prop->mem_quali && desc[i].mem_quali) + GLSLA(" %s", desc[i].mem_quali); + + if (prop->type) + GLSLA(" %s", prop->type); + + if (prop->dim_needed) + GLSLA("%iD", desc[i].dimensions); + + GLSLA(" %s", desc[i].name); + + if (prop->buf_content) + GLSLA(" {\n %s\n}", desc[i].buf_content); + else if (desc[i].elems > 0) + GLSLA("[%i]", desc[i].elems); + + GLSLA(";\n"); + } + GLSLA("\n"); + + return 0; +} + +void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, + int set_id) +{ + VulkanFilterContext *s = avctx->priv; + + vkUpdateDescriptorSetWithTemplate(s->hwctx->act_dev, + pl->desc_set[set_id], + pl->desc_template[set_id], s); +} + +void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e, + VkShaderStageFlagBits stage, int offset, + size_t size, void *src) +{ + vkCmdPushConstants(e->buf, e->bound_pl->pipeline_layout, + stage, offset, size, src); +} + +int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl) +{ + VkResult ret; + VulkanFilterContext *s = avctx->priv; + + { /* Init descriptor set pool */ + VkDescriptorPoolCreateInfo pool_create_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .poolSizeCount = pl->pool_size_desc_num, + .pPoolSizes = pl->pool_size_desc, + .maxSets = pl->descriptor_sets_num, + }; + + ret = vkCreateDescriptorPool(s->hwctx->act_dev, &pool_create_info, + s->hwctx->alloc, &pl->desc_pool); + av_freep(&pl->pool_size_desc); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set " + "pool: %s\n", ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + { /* Allocate descriptor sets */ + VkDescriptorSetAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = pl->desc_pool, + .descriptorSetCount = pl->descriptor_sets_num, + .pSetLayouts = pl->desc_layout, + }; + + pl->desc_set = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_set)); + if (!pl->desc_set) + return AVERROR(ENOMEM); + + ret = vkAllocateDescriptorSets(s->hwctx->act_dev, &alloc_info, + pl->desc_set); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + { /* Finally create the pipeline layout */ + VkPipelineLayoutCreateInfo spawn_pipeline_layout = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = pl->descriptor_sets_num, + .pSetLayouts = pl->desc_layout, + .pushConstantRangeCount = pl->push_consts_num, + .pPushConstantRanges = pl->push_consts, + }; + + ret = vkCreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout, + s->hwctx->alloc, &pl->pipeline_layout); + av_freep(&pl->push_consts); + pl->push_consts_num = 0; + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + { /* Descriptor template (for tightly packed descriptors) */ + VkDescriptorUpdateTemplateCreateInfo *desc_template_info; + + pl->desc_template = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_template)); + if (!pl->desc_template) + return AVERROR(ENOMEM); + + /* Create update templates for the descriptor sets */ + for (int i = 0; i < pl->descriptor_sets_num; i++) { + desc_template_info = &pl->desc_template_info[i]; + desc_template_info->pipelineLayout = pl->pipeline_layout; + ret = vkCreateDescriptorUpdateTemplate(s->hwctx->act_dev, + desc_template_info, + s->hwctx->alloc, + &pl->desc_template[i]); + av_free((void *)desc_template_info->pDescriptorUpdateEntries); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor " + "template: %s\n", ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + av_freep(&pl->desc_template_info); + } + + return 0; +} + +FN_CREATING(VulkanFilterContext, VulkanPipeline, pipeline, pipelines, pipelines_num) +VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx) +{ + return create_pipeline(avctx->priv); +} + +int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl) +{ + int i; + VkResult ret; + VulkanFilterContext *s = avctx->priv; + + VkComputePipelineCreateInfo pipe = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .layout = pl->pipeline_layout, + }; + + for (i = 0; i < pl->shaders_num; i++) { + if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) { + pipe.stage = pl->shaders[i]->shader; + break; + } + } + if (i == pl->shaders_num) { + av_log(avctx, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n"); + return AVERROR(EINVAL); + } + + ret = vkCreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe, + s->hwctx->alloc, &pl->pipeline); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; + + return 0; +} + +void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e, + VulkanPipeline *pl) +{ + vkCmdBindPipeline(e->buf, pl->bind_point, pl->pipeline); + + vkCmdBindDescriptorSets(e->buf, pl->bind_point, pl->pipeline_layout, 0, + pl->descriptor_sets_num, pl->desc_set, 0, 0); + + e->bound_pl = pl; +} + +static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e) +{ + vkDestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc); + + if (e->buf != VK_NULL_HANDLE) + vkFreeCommandBuffers(s->hwctx->act_dev, e->pool, 1, &e->buf); + if (e->pool != VK_NULL_HANDLE) + vkDestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc); + + av_free(e->sem_wait); + av_free(e->sem_wait_dst); + av_free(e->sem_sig); + + av_free(e); +} + +static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl) +{ + for (int i = 0; i < pl->shaders_num; i++) { + SPIRVShader *shd = pl->shaders[i]; + vkDestroyShaderModule(s->hwctx->act_dev, shd->shader.module, + s->hwctx->alloc); + av_free(shd); + } + + vkDestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc); + vkDestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout, + s->hwctx->alloc); + + for (int i = 0; i < pl->descriptor_sets_num; i++) { + vkDestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i], + s->hwctx->alloc); + vkDestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i], + s->hwctx->alloc); + } + + /* Also frees the descriptor sets */ + vkDestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool, + s->hwctx->alloc); + + av_freep(&pl->desc_set); + av_freep(&pl->shaders); + av_freep(&pl->desc_layout); + av_freep(&pl->desc_template); + av_freep(&pl->push_consts); + pl->push_consts_num = 0; + + /* Only freed in case of failure */ + av_freep(&pl->pool_size_desc); + if (pl->desc_template_info) { + for (int i = 0; i < pl->descriptor_sets_num; i++) + av_free((void *)pl->desc_template_info[i].pDescriptorUpdateEntries); + av_freep(&pl->desc_template_info); + } + + av_free(pl); +} + +void ff_vk_filter_uninit(AVFilterContext *avctx) +{ + VulkanFilterContext *s = avctx->priv; + + glslang_uninit(); + + for (int i = 0; i < s->samplers_num; i++) + vkDestroySampler(s->hwctx->act_dev, *s->samplers[i], s->hwctx->alloc); + av_freep(&s->samplers); + + for (int i = 0; i < s->pipelines_num; i++) + free_pipeline(s, s->pipelines[i]); + av_freep(&s->pipelines); + + for (int i = 0; i < s->exec_ctx_num; i++) + free_exec_ctx(s, s->exec_ctx[i]); + av_freep(&s->exec_ctx); + + av_freep(&s->scratch); + s->scratch_size = 0; + + av_buffer_unref(&s->device_ref); + av_buffer_unref(&s->frames_ref); +} diff --git a/libavfilter/vulkan.h b/libavfilter/vulkan.h new file mode 100644 index 0000000000000..8d4def1a004ab --- /dev/null +++ b/libavfilter/vulkan.h @@ -0,0 +1,323 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_VULKAN_COMMON_H +#define AVFILTER_VULKAN_COMMON_H + +#include "avfilter.h" +#include "libavutil/pixdesc.h" +#include "libavutil/bprint.h" +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_vulkan.h" + +/* GLSL management macros */ +#define INDENT(N) INDENT_##N +#define INDENT_0 +#define INDENT_1 INDENT_0 " " +#define INDENT_2 INDENT_1 INDENT_1 +#define INDENT_3 INDENT_2 INDENT_1 +#define INDENT_4 INDENT_3 INDENT_1 +#define INDENT_5 INDENT_4 INDENT_1 +#define INDENT_6 INDENT_5 INDENT_1 +#define C(N, S) INDENT(N) #S "\n" +#define GLSLC(N, S) av_bprintf(&shd->src, C(N, S)) +#define GLSLA(...) av_bprintf(&shd->src, __VA_ARGS__) +#define GLSLF(N, S, ...) av_bprintf(&shd->src, C(N, S), __VA_ARGS__) +#define GLSLD(D) GLSLC(0, ); \ + av_bprint_append_data(&shd->src, D, strlen(D)); \ + GLSLC(0, ) + +/* Helper, pretty much every Vulkan return value needs to be checked */ +#define RET(x) \ + do { \ + if ((err = (x)) < 0) \ + goto fail; \ + } while (0) + +/* Useful for attaching immutable samplers to arrays */ +#define DUP_SAMPLER_ARRAY4(x) (VkSampler []){ x, x, x, x, } + +typedef struct SPIRVShader { + const char *name; /* Name for id/debugging purposes */ + AVBPrint src; + int local_size[3]; /* Compute shader workgroup sizes */ + VkPipelineShaderStageCreateInfo shader; +} SPIRVShader; + +typedef struct VulkanDescriptorSetBinding { + const char *name; + VkDescriptorType type; + const char *mem_layout; /* Storage images (rgba8, etc.) and buffers (std430, etc.) */ + const char *mem_quali; /* readonly, writeonly, etc. */ + const char *buf_content; /* For buffers */ + uint32_t dimensions; /* Needed for e.g. sampler%iD */ + uint32_t elems; /* 0 - scalar, 1 or more - vector */ + VkShaderStageFlags stages; + const VkSampler *samplers; /* Immutable samplers, length - #elems */ + void *updater; /* Pointer to VkDescriptor*Info */ +} VulkanDescriptorSetBinding; + +typedef struct FFVkBuffer { + VkBuffer buf; + VkDeviceMemory mem; + VkMemoryPropertyFlagBits flags; +} FFVkBuffer; + +typedef struct VulkanPipeline { + VkPipelineBindPoint bind_point; + + /* Contexts */ + VkPipelineLayout pipeline_layout; + VkPipeline pipeline; + + /* Shaders */ + SPIRVShader **shaders; + int shaders_num; + + /* Push consts */ + VkPushConstantRange *push_consts; + int push_consts_num; + + /* Descriptors */ + VkDescriptorSetLayout *desc_layout; + VkDescriptorPool desc_pool; + VkDescriptorSet *desc_set; + VkDescriptorUpdateTemplate *desc_template; + int descriptor_sets_num; + int pool_size_desc_num; + + /* Temporary, used to store data in between initialization stages */ + VkDescriptorUpdateTemplateCreateInfo *desc_template_info; + VkDescriptorPoolSize *pool_size_desc; +} VulkanPipeline; + +typedef struct FFVkExecContext { + VkCommandPool pool; + VkCommandBuffer buf; + VkQueue queue; + VkFence fence; + + VulkanPipeline *bound_pl; + + VkSemaphore *sem_wait; + int sem_wait_alloc; /* Allocated sem_wait */ + int sem_wait_cnt; + + VkPipelineStageFlagBits *sem_wait_dst; + int sem_wait_dst_alloc; /* Allocated sem_wait_dst */ + + VkSemaphore *sem_sig; + int sem_sig_alloc; /* Allocated sem_sig */ + int sem_sig_cnt; +} FFVkExecContext; + +typedef struct VulkanFilterContext { + const AVClass *class; + + AVBufferRef *device_ref; + AVBufferRef *frames_ref; /* For in-place filtering */ + AVHWDeviceContext *device; + AVVulkanDeviceContext *hwctx; + + /* Properties */ + int output_width; + int output_height; + enum AVPixelFormat output_format; + enum AVPixelFormat input_format; + + /* Samplers */ + VkSampler **samplers; + int samplers_num; + + /* Exec contexts */ + FFVkExecContext **exec_ctx; + int exec_ctx_num; + + /* Pipelines (each can have 1 shader of each type) */ + VulkanPipeline **pipelines; + int pipelines_num; + + void *scratch; /* Scratch memory used only in functions */ + unsigned int scratch_size; +} VulkanFilterContext; + +/* Identity mapping - r = r, b = b, g = g, a = a */ +extern const VkComponentMapping ff_comp_identity_map; + +/** + * General lavfi IO functions + */ +int ff_vk_filter_query_formats (AVFilterContext *avctx); +int ff_vk_filter_init (AVFilterContext *avctx); +int ff_vk_filter_config_input (AVFilterLink *inlink); +int ff_vk_filter_config_output (AVFilterLink *outlink); +int ff_vk_filter_config_output_inplace(AVFilterLink *outlink); +void ff_vk_filter_uninit (AVFilterContext *avctx); + +/** + * Converts Vulkan return values to strings + */ +const char *ff_vk_ret2str(VkResult res); + +/** + * Gets the glsl format string for a pixel format + */ +const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt); + +/** + * Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit() + */ +VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords, + VkFilter filt); + +/** + * Create an imageview. + */ +int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, VkImage img, + VkFormat fmt, const VkComponentMapping map); + +/** + * Destroy an imageview. Command buffer must have completed executing, which + * ff_vk_submit_exec_queue() will ensure + */ +void ff_vk_destroy_imageview(AVFilterContext *avctx, VkImageView *v); + +/** + * Define a push constant for a given stage into a pipeline. + * Must be called before the pipeline layout has been initialized. + */ +int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl, + int offset, int size, VkShaderStageFlagBits stage); + +/** + * Inits a pipeline. Everything in it will be auto-freed when calling + * ff_vk_filter_uninit(). + */ +VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx); + +/** + * Inits a shader for a specific pipeline. Will be auto-freed on uninit. + */ +SPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl, + const char *name, VkShaderStageFlags stage); + +/** + * Writes the workgroup size for a shader. + */ +void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, SPIRVShader *shd, + int local_size[3]); + +/** + * Adds a descriptor set to the shader and registers them in the pipeline. + */ +int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, + SPIRVShader *shd, VulkanDescriptorSetBinding *desc, + int num, int only_print_to_shader); + +/** + * Compiles the shader, entrypoint must be set to "main". + */ +int ff_vk_compile_shader(AVFilterContext *avctx, SPIRVShader *shd, + const char *entrypoint); + +/** + * Initializes the pipeline layout after all shaders and descriptor sets have + * been finished. + */ +int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl); + +/** + * Initializes a compute pipeline. Will pick the first shader with the + * COMPUTE flag set. + */ +int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl); + +/** + * Updates a descriptor set via the updaters defined. + * Can be called immediately after pipeline creation, but must be called + * at least once before queue submission. + */ +void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, + int set_id); + +/** + * Init an execution context for command recording and queue submission. + * WIll be auto-freed on uninit. + */ +int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx, int queue); + +/** + * Begin recording to the command buffer. Previous execution must have been + * completed, which ff_vk_submit_exec_queue() will ensure. + */ +int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e); + +/** + * Add a command to bind the completed pipeline and its descriptor sets. + * Must be called after ff_vk_start_exec_recording() and before submission. + */ +void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e, + VulkanPipeline *pl); + +/** + * Updates push constants. + * Must be called after binding a pipeline if any push constants were defined. + */ +void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e, + VkShaderStageFlagBits stage, int offset, + size_t size, void *src); + +/** + * Adds a frame as a queue dependency. This manages semaphore signalling. + * Must be called before submission. + */ +int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e, + AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag); + +/** + * Submits a command buffer to the queue for execution. + * Will block until execution has finished in order to simplify resource + * management. + */ +int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e); + +/** + * Create a VkBuffer with the specified parameters. + */ +int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); + +/** + * Maps the buffer to userspace. Set invalidate to 1 if reading the contents + * is necessary. + */ +int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[], + int nb_buffers, int invalidate); + +/** + * Unmaps the buffer from userspace. Set flush to 1 to write and sync. + */ +int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers, + int flush); + +/** + * Frees a buffer. + */ +void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf); + +#endif /* AVFILTER_VULKAN_COMMON_H */ diff --git a/libavutil/Makefile b/libavutil/Makefile index 57e6e3d7e8544..b189f9abea381 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -43,6 +43,7 @@ HEADERS = adler32.h \ hwcontext_vaapi.h \ hwcontext_videotoolbox.h \ hwcontext_vdpau.h \ + hwcontext_vulkan.h \ imgutils.h \ intfloat.h \ intreadwrite.h \ @@ -175,6 +176,7 @@ OBJS-$(CONFIG_QSV) += hwcontext_qsv.o OBJS-$(CONFIG_VAAPI) += hwcontext_vaapi.o OBJS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.o OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o +OBJS-$(CONFIG_VULKAN) += hwcontext_vulkan.o OBJS += $(COMPAT_OBJS:%=../compat/%) @@ -191,6 +193,7 @@ SKIPHEADERS-$(CONFIG_OPENCL) += hwcontext_opencl.h SKIPHEADERS-$(CONFIG_VAAPI) += hwcontext_vaapi.h SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.h SKIPHEADERS-$(CONFIG_VDPAU) += hwcontext_vdpau.h +SKIPHEADERS-$(CONFIG_VULKAN) += hwcontext_vulkan.h TESTPROGS = adler32 \ aes \ diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c index f1e404ab2015d..d09a15a249156 100644 --- a/libavutil/hwcontext.c +++ b/libavutil/hwcontext.c @@ -58,6 +58,9 @@ static const HWContextType * const hw_table[] = { #endif #if CONFIG_MEDIACODEC &ff_hwcontext_type_mediacodec, +#endif +#if CONFIG_VULKAN + &ff_hwcontext_type_vulkan, #endif NULL, }; @@ -73,6 +76,7 @@ static const char *const hw_type_names[] = { [AV_HWDEVICE_TYPE_VDPAU] = "vdpau", [AV_HWDEVICE_TYPE_VIDEOTOOLBOX] = "videotoolbox", [AV_HWDEVICE_TYPE_MEDIACODEC] = "mediacodec", + [AV_HWDEVICE_TYPE_VULKAN] = "vulkan", }; enum AVHWDeviceType av_hwdevice_find_type_by_name(const char *name) @@ -444,21 +448,54 @@ int av_hwframe_transfer_data(AVFrame *dst, const AVFrame *src, int flags) if (!dst->buf[0]) return transfer_data_alloc(dst, src, flags); - if (src->hw_frames_ctx) { - ctx = (AVHWFramesContext*)src->hw_frames_ctx->data; + /* + * Hardware -> Hardware Transfer. + * Unlike Software -> Hardware or Hardware -> Software, the transfer + * function could be provided by either the src or dst, depending on + * the specific combination of hardware. + */ + if (src->hw_frames_ctx && dst->hw_frames_ctx) { + AVHWFramesContext *src_ctx = + (AVHWFramesContext*)src->hw_frames_ctx->data; + AVHWFramesContext *dst_ctx = + (AVHWFramesContext*)dst->hw_frames_ctx->data; + + if (src_ctx->internal->source_frames) { + av_log(src_ctx, AV_LOG_ERROR, + "A device with a derived frame context cannot be used as " + "the source of a HW -> HW transfer."); + return AVERROR(ENOSYS); + } - ret = ctx->internal->hw_type->transfer_data_from(ctx, dst, src); - if (ret < 0) - return ret; - } else if (dst->hw_frames_ctx) { - ctx = (AVHWFramesContext*)dst->hw_frames_ctx->data; + if (dst_ctx->internal->source_frames) { + av_log(src_ctx, AV_LOG_ERROR, + "A device with a derived frame context cannot be used as " + "the destination of a HW -> HW transfer."); + return AVERROR(ENOSYS); + } - ret = ctx->internal->hw_type->transfer_data_to(ctx, dst, src); + ret = src_ctx->internal->hw_type->transfer_data_from(src_ctx, dst, src); + if (ret == AVERROR(ENOSYS)) + ret = dst_ctx->internal->hw_type->transfer_data_to(dst_ctx, dst, src); if (ret < 0) return ret; - } else - return AVERROR(ENOSYS); + } else { + if (src->hw_frames_ctx) { + ctx = (AVHWFramesContext*)src->hw_frames_ctx->data; + + ret = ctx->internal->hw_type->transfer_data_from(ctx, dst, src); + if (ret < 0) + return ret; + } else if (dst->hw_frames_ctx) { + ctx = (AVHWFramesContext*)dst->hw_frames_ctx->data; + ret = ctx->internal->hw_type->transfer_data_to(ctx, dst, src); + if (ret < 0) + return ret; + } else { + return AVERROR(ENOSYS); + } + } return 0; } diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h index f5a4b62387747..f874af9f8fc1d 100644 --- a/libavutil/hwcontext.h +++ b/libavutil/hwcontext.h @@ -36,6 +36,7 @@ enum AVHWDeviceType { AV_HWDEVICE_TYPE_DRM, AV_HWDEVICE_TYPE_OPENCL, AV_HWDEVICE_TYPE_MEDIACODEC, + AV_HWDEVICE_TYPE_VULKAN, }; typedef struct AVHWDeviceInternal AVHWDeviceInternal; diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c index 30611b1912051..53142edd0a169 100644 --- a/libavutil/hwcontext_cuda.c +++ b/libavutil/hwcontext_cuda.c @@ -21,6 +21,9 @@ #include "hwcontext.h" #include "hwcontext_internal.h" #include "hwcontext_cuda_internal.h" +#if CONFIG_VULKAN +#include "hwcontext_vulkan.h" +#endif #include "cuda_check.h" #include "mem.h" #include "pixdesc.h" @@ -42,6 +45,9 @@ static const enum AVPixelFormat supported_formats[] = { AV_PIX_FMT_YUV444P16, AV_PIX_FMT_0RGB32, AV_PIX_FMT_0BGR32, +#if CONFIG_VULKAN + AV_PIX_FMT_VULKAN, +#endif }; #define CHECK_CU(x) FF_CUDA_CHECK_DL(device_ctx, cu, x) @@ -205,6 +211,10 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, CUcontext dummy; int i, ret; + /* We don't support transfers to HW devices. */ + if (dst->hw_frames_ctx) + return AVERROR(ENOSYS); + ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); if (ret < 0) return ret; @@ -247,6 +257,10 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, CUcontext dummy; int i, ret; + /* We don't support transfers from HW devices. */ + if (src->hw_frames_ctx) + return AVERROR(ENOSYS); + ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); if (ret < 0) return ret; @@ -322,18 +336,64 @@ static int cuda_device_init(AVHWDeviceContext *ctx) return ret; } -static int cuda_device_create(AVHWDeviceContext *device_ctx, - const char *device, - AVDictionary *opts, int flags) -{ +static int cuda_context_init(AVHWDeviceContext *device_ctx, int flags) { AVCUDADeviceContext *hwctx = device_ctx->hwctx; CudaFunctions *cu; CUcontext dummy; - int ret, dev_active = 0, device_idx = 0; + int ret, dev_active = 0; unsigned int dev_flags = 0; const unsigned int desired_flags = CU_CTX_SCHED_BLOCKING_SYNC; + cu = hwctx->internal->cuda_dl; + + hwctx->internal->flags = flags; + + if (flags & AV_CUDA_USE_PRIMARY_CONTEXT) { + ret = CHECK_CU(cu->cuDevicePrimaryCtxGetState(hwctx->internal->cuda_device, + &dev_flags, &dev_active)); + if (ret < 0) + return ret; + + if (dev_active && dev_flags != desired_flags) { + av_log(device_ctx, AV_LOG_ERROR, "Primary context already active with incompatible flags.\n"); + return AVERROR(ENOTSUP); + } else if (dev_flags != desired_flags) { + ret = CHECK_CU(cu->cuDevicePrimaryCtxSetFlags(hwctx->internal->cuda_device, + desired_flags)); + if (ret < 0) + return ret; + } + + ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, + hwctx->internal->cuda_device)); + if (ret < 0) + return ret; + } else { + ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, desired_flags, + hwctx->internal->cuda_device)); + if (ret < 0) + return ret; + + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + } + + hwctx->internal->is_allocated = 1; + + // Setting stream to NULL will make functions automatically use the default CUstream + hwctx->stream = NULL; + + return 0; +} + +static int cuda_device_create(AVHWDeviceContext *device_ctx, + const char *device, + AVDictionary *opts, int flags) +{ + AVCUDADeviceContext *hwctx = device_ctx->hwctx; + CudaFunctions *cu; + int ret, device_idx = 0; + if (device) device_idx = strtol(device, NULL, 0); @@ -350,37 +410,83 @@ static int cuda_device_create(AVHWDeviceContext *device_ctx, if (ret < 0) goto error; - hwctx->internal->flags = flags; + ret = cuda_context_init(device_ctx, flags); + if (ret < 0) + goto error; - if (flags & AV_CUDA_USE_PRIMARY_CONTEXT) { - ret = CHECK_CU(cu->cuDevicePrimaryCtxGetState(hwctx->internal->cuda_device, &dev_flags, &dev_active)); - if (ret < 0) - goto error; + return 0; - if (dev_active && dev_flags != desired_flags) { - av_log(device_ctx, AV_LOG_ERROR, "Primary context already active with incompatible flags.\n"); - goto error; - } else if (dev_flags != desired_flags) { - ret = CHECK_CU(cu->cuDevicePrimaryCtxSetFlags(hwctx->internal->cuda_device, desired_flags)); - if (ret < 0) - goto error; - } +error: + cuda_device_uninit(device_ctx); + return AVERROR_UNKNOWN; +} + +static int cuda_device_derive(AVHWDeviceContext *device_ctx, + AVHWDeviceContext *src_ctx, + int flags) { + AVCUDADeviceContext *hwctx = device_ctx->hwctx; + CudaFunctions *cu; + const char *src_uuid = NULL; + int ret, i, device_count; + + switch (src_ctx->type) { +#if CONFIG_VULKAN + case AV_HWDEVICE_TYPE_VULKAN: { + AVVulkanDeviceContext *vkctx = src_ctx->hwctx; + src_uuid = vkctx->device_uuid; + break; + } +#endif + default: + return AVERROR(ENOSYS); + } + + if (!src_uuid) { + av_log(device_ctx, AV_LOG_ERROR, + "Failed to get UUID of source device.\n"); + goto error; + } - ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, hwctx->internal->cuda_device)); + if (cuda_device_init(device_ctx) < 0) + goto error; + + cu = hwctx->internal->cuda_dl; + + ret = CHECK_CU(cu->cuInit(0)); + if (ret < 0) + goto error; + + ret = CHECK_CU(cu->cuDeviceGetCount(&device_count)); + if (ret < 0) + goto error; + + hwctx->internal->cuda_device = -1; + for (i = 0; i < device_count; i++) { + CUdevice dev; + CUuuid uuid; + + ret = CHECK_CU(cu->cuDeviceGet(&dev, i)); if (ret < 0) goto error; - } else { - ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, desired_flags, hwctx->internal->cuda_device)); + + ret = CHECK_CU(cu->cuDeviceGetUuid(&uuid, dev)); if (ret < 0) goto error; - CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + if (memcmp(src_uuid, uuid.bytes, sizeof (uuid.bytes)) == 0) { + hwctx->internal->cuda_device = dev; + break; + } } - hwctx->internal->is_allocated = 1; + if (hwctx->internal->cuda_device == -1) { + av_log(device_ctx, AV_LOG_ERROR, "Could not derive CUDA device.\n"); + goto error; + } - // Setting stream to NULL will make functions automatically use the default CUstream - hwctx->stream = NULL; + ret = cuda_context_init(device_ctx, flags); + if (ret < 0) + goto error; return 0; @@ -397,6 +503,7 @@ const HWContextType ff_hwcontext_type_cuda = { .frames_priv_size = sizeof(CUDAFramesContext), .device_create = cuda_device_create, + .device_derive = cuda_device_derive, .device_init = cuda_device_init, .device_uninit = cuda_device_uninit, .frames_get_constraints = cuda_frames_get_constraints, diff --git a/libavutil/hwcontext_internal.h b/libavutil/hwcontext_internal.h index 77dc47ddd6e64..dba0f39944ced 100644 --- a/libavutil/hwcontext_internal.h +++ b/libavutil/hwcontext_internal.h @@ -172,5 +172,6 @@ extern const HWContextType ff_hwcontext_type_vaapi; extern const HWContextType ff_hwcontext_type_vdpau; extern const HWContextType ff_hwcontext_type_videotoolbox; extern const HWContextType ff_hwcontext_type_mediacodec; +extern const HWContextType ff_hwcontext_type_vulkan; #endif /* AVUTIL_HWCONTEXT_INTERNAL_H */ diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c new file mode 100644 index 0000000000000..77c33cfda863e --- /dev/null +++ b/libavutil/hwcontext_vulkan.c @@ -0,0 +1,2803 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "pixdesc.h" +#include "avstring.h" +#include "imgutils.h" +#include "hwcontext.h" +#include "hwcontext_internal.h" +#include "hwcontext_vulkan.h" + +#if CONFIG_LIBDRM +#include +#include +#include +#include "hwcontext_drm.h" +#if CONFIG_VAAPI +#include +#include "hwcontext_vaapi.h" +#endif +#endif + +#if CONFIG_CUDA +#include "hwcontext_cuda_internal.h" +#include "cuda_check.h" +#define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x) +#endif + +typedef struct VulkanExecCtx { + VkCommandPool pool; + VkCommandBuffer buf; + VkQueue queue; + VkFence fence; +} VulkanExecCtx; + +typedef struct VulkanDevicePriv { + /* Properties */ + VkPhysicalDeviceProperties props; + VkPhysicalDeviceMemoryProperties mprops; + + /* Debug callback */ + VkDebugUtilsMessengerEXT debug_ctx; + + /* Image uploading */ + VulkanExecCtx cmd; + + /* Extensions */ + uint64_t extensions; + + /* Settings */ + int use_linear_images; +} VulkanDevicePriv; + +typedef struct AVVkFrameInternal { +#if CONFIG_CUDA + /* Importing external memory into cuda is really expensive so we keep the + * memory imported all the time */ + AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */ + CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS]; + CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS]; + CUarray cu_array[AV_NUM_DATA_POINTERS]; + CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS]; +#endif +} AVVkFrameInternal; + +#define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \ + vkGetInstanceProcAddr(inst, #name) + +#define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \ + VK_IMAGE_USAGE_STORAGE_BIT | \ + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \ + VK_IMAGE_USAGE_TRANSFER_DST_BIT) + +#define ADD_VAL_TO_LIST(list, count, val) \ + do { \ + list = av_realloc_array(list, sizeof(*list), ++count); \ + if (!list) { \ + err = AVERROR(ENOMEM); \ + goto end; \ + } \ + list[count - 1] = val; \ + } while(0) + +static const struct { + enum AVPixelFormat pixfmt; + const VkFormat vkfmts[3]; +} vk_pixfmt_map[] = { + { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } }, + { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } }, + { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } }, + + { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, + { AV_PIX_FMT_P010, { VK_FORMAT_R10X6_UNORM_PACK16, VK_FORMAT_R10X6G10X6_UNORM_2PACK16 } }, + { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { AV_PIX_FMT_NV16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + + { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + + { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + + { AV_PIX_FMT_ABGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } }, + { AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } }, + { AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } }, + { AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } }, + { AV_PIX_FMT_BGR24, { VK_FORMAT_B8G8R8_UNORM } }, + { AV_PIX_FMT_RGB48, { VK_FORMAT_R16G16B16_UNORM } }, + { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } }, + { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } }, + { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } }, + { AV_PIX_FMT_BGR0, { VK_FORMAT_B8G8R8A8_UNORM } }, + { AV_PIX_FMT_0BGR, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } }, + { AV_PIX_FMT_RGB0, { VK_FORMAT_R8G8B8A8_UNORM } }, + + { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, +}; + +const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p) +{ + for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++) + if (vk_pixfmt_map[i].pixfmt == p) + return vk_pixfmt_map[i].vkfmts; + return NULL; +} + +static int pixfmt_is_supported(AVVulkanDeviceContext *hwctx, enum AVPixelFormat p, + int linear) +{ + const VkFormat *fmt = av_vkfmt_from_pixfmt(p); + int planes = av_pix_fmt_count_planes(p); + + if (!fmt) + return 0; + + for (int i = 0; i < planes; i++) { + VkFormatFeatureFlags flags; + VkFormatProperties2 prop = { + .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, + }; + vkGetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop); + flags = linear ? prop.formatProperties.linearTilingFeatures : + prop.formatProperties.optimalTilingFeatures; + if (!(flags & DEFAULT_USAGE_FLAGS)) + return 0; + } + + return 1; +} + +enum VulkanExtensions { + EXT_EXTERNAL_DMABUF_MEMORY = 1ULL << 0, /* VK_EXT_external_memory_dma_buf */ + EXT_DRM_MODIFIER_FLAGS = 1ULL << 1, /* VK_EXT_image_drm_format_modifier */ + EXT_EXTERNAL_FD_MEMORY = 1ULL << 2, /* VK_KHR_external_memory_fd */ + EXT_EXTERNAL_FD_SEM = 1ULL << 3, /* VK_KHR_external_semaphore_fd */ + + EXT_OPTIONAL = 1ULL << 62, + EXT_REQUIRED = 1ULL << 63, +}; + +typedef struct VulkanOptExtension { + const char *name; + uint64_t flag; +} VulkanOptExtension; + +static const VulkanOptExtension optional_instance_exts[] = { + /* For future use */ +}; + +static const VulkanOptExtension optional_device_exts[] = { + { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, EXT_EXTERNAL_FD_MEMORY, }, + { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, EXT_EXTERNAL_DMABUF_MEMORY, }, + { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, EXT_DRM_MODIFIER_FLAGS, }, + { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, EXT_EXTERNAL_FD_SEM, }, +}; + +/* Converts return values to strings */ +static const char *vk_ret2str(VkResult res) +{ +#define CASE(VAL) case VAL: return #VAL + switch (res) { + CASE(VK_SUCCESS); + CASE(VK_NOT_READY); + CASE(VK_TIMEOUT); + CASE(VK_EVENT_SET); + CASE(VK_EVENT_RESET); + CASE(VK_INCOMPLETE); + CASE(VK_ERROR_OUT_OF_HOST_MEMORY); + CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY); + CASE(VK_ERROR_INITIALIZATION_FAILED); + CASE(VK_ERROR_DEVICE_LOST); + CASE(VK_ERROR_MEMORY_MAP_FAILED); + CASE(VK_ERROR_LAYER_NOT_PRESENT); + CASE(VK_ERROR_EXTENSION_NOT_PRESENT); + CASE(VK_ERROR_FEATURE_NOT_PRESENT); + CASE(VK_ERROR_INCOMPATIBLE_DRIVER); + CASE(VK_ERROR_TOO_MANY_OBJECTS); + CASE(VK_ERROR_FORMAT_NOT_SUPPORTED); + CASE(VK_ERROR_FRAGMENTED_POOL); + CASE(VK_ERROR_SURFACE_LOST_KHR); + CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); + CASE(VK_SUBOPTIMAL_KHR); + CASE(VK_ERROR_OUT_OF_DATE_KHR); + CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); + CASE(VK_ERROR_VALIDATION_FAILED_EXT); + CASE(VK_ERROR_INVALID_SHADER_NV); + CASE(VK_ERROR_OUT_OF_POOL_MEMORY); + CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); + CASE(VK_ERROR_NOT_PERMITTED_EXT); + CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT); + CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT); + CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT); + default: return "Unknown error"; + } +#undef CASE +} + +static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, + VkDebugUtilsMessageTypeFlagsEXT messageType, + const VkDebugUtilsMessengerCallbackDataEXT *data, + void *priv) +{ + int l; + AVHWDeviceContext *ctx = priv; + + switch (severity) { + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l = AV_LOG_ERROR; break; + default: l = AV_LOG_DEBUG; break; + } + + av_log(ctx, l, "%s\n", data->pMessage); + for (int i = 0; i < data->cmdBufLabelCount; i++) + av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName); + + return 0; +} + +static int check_extensions(AVHWDeviceContext *ctx, int dev, + const char * const **dst, uint32_t *num, int debug) +{ + const char *tstr; + const char **extension_names = NULL; + VulkanDevicePriv *p = ctx->internal->priv; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + int err = 0, found, extensions_found = 0; + + const char *mod; + int optional_exts_num; + uint32_t sup_ext_count; + VkExtensionProperties *sup_ext; + const VulkanOptExtension *optional_exts; + + if (!dev) { + mod = "instance"; + optional_exts = optional_instance_exts; + optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts); + vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL); + sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties)); + if (!sup_ext) + return AVERROR(ENOMEM); + vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext); + } else { + mod = "device"; + optional_exts = optional_device_exts; + optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts); + vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL, + &sup_ext_count, NULL); + sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties)); + if (!sup_ext) + return AVERROR(ENOMEM); + vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL, + &sup_ext_count, sup_ext); + } + + for (int i = 0; i < optional_exts_num; i++) { + int req = optional_exts[i].flag & EXT_REQUIRED; + tstr = optional_exts[i].name; + + found = 0; + for (int j = 0; j < sup_ext_count; j++) { + if (!strcmp(tstr, sup_ext[j].extensionName)) { + found = 1; + break; + } + } + if (!found) { + int lvl = req ? AV_LOG_ERROR : AV_LOG_VERBOSE; + av_log(ctx, lvl, "Extension \"%s\" not found!\n", tstr); + if (req) { + err = AVERROR(EINVAL); + goto end; + } + continue; + } + if (!req) + p->extensions |= optional_exts[i].flag; + + av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr); + + ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); + } + + if (debug && !dev) { + tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME; + found = 0; + for (int j = 0; j < sup_ext_count; j++) { + if (!strcmp(tstr, sup_ext[j].extensionName)) { + found = 1; + break; + } + } + if (found) { + ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); + } else { + av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n", + tstr); + err = AVERROR(EINVAL); + goto end; + } + } + + *dst = extension_names; + *num = extensions_found; + +end: + av_free(sup_ext); + return err; +} + +/* Creates a VkInstance */ +static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) +{ + int err = 0; + VkResult ret; + VulkanDevicePriv *p = ctx->internal->priv; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0); + const int debug_mode = debug_opt && strtol(debug_opt->value, NULL, 10); + VkApplicationInfo application_info = { + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pEngineName = "libavutil", + .apiVersion = VK_API_VERSION_1_1, + .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR, + LIBAVUTIL_VERSION_MINOR, + LIBAVUTIL_VERSION_MICRO), + }; + VkInstanceCreateInfo inst_props = { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pApplicationInfo = &application_info, + }; + + /* Check for present/missing extensions */ + err = check_extensions(ctx, 0, &inst_props.ppEnabledExtensionNames, + &inst_props.enabledExtensionCount, debug_mode); + if (err < 0) + return err; + + if (debug_mode) { + static const char *layers[] = { "VK_LAYER_LUNARG_standard_validation" }; + inst_props.ppEnabledLayerNames = layers; + inst_props.enabledLayerCount = FF_ARRAY_ELEMS(layers); + } + + /* Try to create the instance */ + ret = vkCreateInstance(&inst_props, hwctx->alloc, &hwctx->inst); + + /* Free used memory */ + av_free((void *)inst_props.ppEnabledExtensionNames); + + /* Check for errors */ + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + if (debug_mode) { + VkDebugUtilsMessengerCreateInfoEXT dbg = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, + .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, + .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, + .pfnUserCallback = vk_dbg_callback, + .pUserData = ctx, + }; + VK_LOAD_PFN(hwctx->inst, vkCreateDebugUtilsMessengerEXT); + + pfn_vkCreateDebugUtilsMessengerEXT(hwctx->inst, &dbg, + hwctx->alloc, &p->debug_ctx); + } + + return 0; +} + +typedef struct VulkanDeviceSelection { + uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */ + int has_uuid; + const char *name; /* Will use this second unless NULL */ + uint32_t pci_device; /* Will use this second unless 0x0 */ + uint32_t vendor_id; /* Last resort to find something deterministic */ + int index; /* Finally fall back to index */ +} VulkanDeviceSelection; + +static const char *vk_dev_type(enum VkPhysicalDeviceType type) +{ + switch (type) { + case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated"; + case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: return "discrete"; + case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: return "virtual"; + case VK_PHYSICAL_DEVICE_TYPE_CPU: return "software"; + default: return "unknown"; + } +} + +/* Finds a device */ +static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) +{ + int err = 0, choice = -1; + uint32_t num; + VkResult ret; + VkPhysicalDevice *devices = NULL; + VkPhysicalDeviceIDProperties *idp = NULL; + VkPhysicalDeviceProperties2 *prop = NULL; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + + ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, NULL); + if (ret != VK_SUCCESS || !num) { + av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret)); + return AVERROR(ENODEV); + } + + devices = av_malloc_array(num, sizeof(VkPhysicalDevice)); + if (!devices) + return AVERROR(ENOMEM); + + ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, devices); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n", + vk_ret2str(ret)); + err = AVERROR(ENODEV); + goto end; + } + + prop = av_mallocz_array(num, sizeof(*prop)); + if (!prop) { + err = AVERROR(ENOMEM); + goto end; + } + + idp = av_mallocz_array(num, sizeof(*idp)); + if (!idp) { + err = AVERROR(ENOMEM); + goto end; + } + + av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n"); + for (int i = 0; i < num; i++) { + idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES; + prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + prop[i].pNext = &idp[i]; + + vkGetPhysicalDeviceProperties2(devices[i], &prop[i]); + av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i, + prop[i].properties.deviceName, + vk_dev_type(prop[i].properties.deviceType), + prop[i].properties.deviceID); + } + + if (select->has_uuid) { + for (int i = 0; i < num; i++) { + if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) { + choice = i; + goto end; + } + } + av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n"); + err = AVERROR(ENODEV); + goto end; + } else if (select->name) { + av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name); + for (int i = 0; i < num; i++) { + if (strstr(prop[i].properties.deviceName, select->name)) { + choice = i; + goto end; + } + } + av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n", + select->name); + err = AVERROR(ENODEV); + goto end; + } else if (select->pci_device) { + av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device); + for (int i = 0; i < num; i++) { + if (select->pci_device == prop[i].properties.deviceID) { + choice = i; + goto end; + } + } + av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n", + select->pci_device); + err = AVERROR(EINVAL); + goto end; + } else if (select->vendor_id) { + av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id); + for (int i = 0; i < num; i++) { + if (select->vendor_id == prop[i].properties.vendorID) { + choice = i; + goto end; + } + } + av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n", + select->vendor_id); + err = AVERROR(ENODEV); + goto end; + } else { + if (select->index < num) { + choice = select->index; + goto end; + } + av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n", + select->index); + err = AVERROR(ENODEV); + goto end; + } + +end: + if (choice > -1) { + hwctx->phys_dev = devices[choice]; + memcpy(hwctx->device_uuid, idp[choice].deviceUUID, VK_UUID_SIZE); + } + av_free(devices); + av_free(prop); + av_free(idp); + + return err; +} + +static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd) +{ + uint32_t num; + VkQueueFamilyProperties *qs = NULL; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + int graph_index = -1, comp_index = -1, tx_index = -1; + VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos; + + /* First get the number of queue families */ + vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL); + if (!num) { + av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n"); + return AVERROR_EXTERNAL; + } + + /* Then allocate memory */ + qs = av_malloc_array(num, sizeof(VkQueueFamilyProperties)); + if (!qs) + return AVERROR(ENOMEM); + + /* Finally retrieve the queue families */ + vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qs); + +#define SEARCH_FLAGS(expr, out) \ + for (int i = 0; i < num; i++) { \ + const VkQueueFlagBits flags = qs[i].queueFlags; \ + if (expr) { \ + out = i; \ + break; \ + } \ + } + + SEARCH_FLAGS(flags & VK_QUEUE_GRAPHICS_BIT, graph_index) + + SEARCH_FLAGS((flags & VK_QUEUE_COMPUTE_BIT) && (i != graph_index), + comp_index) + + SEARCH_FLAGS((flags & VK_QUEUE_TRANSFER_BIT) && (i != graph_index) && + (i != comp_index), tx_index) + +#undef SEARCH_FLAGS +#define QF_FLAGS(flags) \ + ((flags) & VK_QUEUE_GRAPHICS_BIT ) ? "(graphics) " : "", \ + ((flags) & VK_QUEUE_COMPUTE_BIT ) ? "(compute) " : "", \ + ((flags) & VK_QUEUE_TRANSFER_BIT ) ? "(transfer) " : "", \ + ((flags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : "" + + av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for graphics, " + "flags: %s%s%s%s\n", graph_index, QF_FLAGS(qs[graph_index].queueFlags)); + + hwctx->queue_family_index = graph_index; + hwctx->queue_family_tx_index = graph_index; + hwctx->queue_family_comp_index = graph_index; + + pc[cd->queueCreateInfoCount++].queueFamilyIndex = graph_index; + + if (comp_index != -1) { + av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for compute, " + "flags: %s%s%s%s\n", comp_index, QF_FLAGS(qs[comp_index].queueFlags)); + hwctx->queue_family_tx_index = comp_index; + hwctx->queue_family_comp_index = comp_index; + pc[cd->queueCreateInfoCount++].queueFamilyIndex = comp_index; + } + + if (tx_index != -1) { + av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for transfers, " + "flags: %s%s%s%s\n", tx_index, QF_FLAGS(qs[tx_index].queueFlags)); + hwctx->queue_family_tx_index = tx_index; + pc[cd->queueCreateInfoCount++].queueFamilyIndex = tx_index; + } + +#undef QF_FLAGS + + av_free(qs); + + return 0; +} + +static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd, + int queue_family_index) +{ + VkResult ret; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + + VkCommandPoolCreateInfo cqueue_create = { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = queue_family_index, + }; + VkCommandBufferAllocateInfo cbuf_create = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = 1, + }; + + VkFenceCreateInfo fence_spawn = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + }; + + ret = vkCreateFence(hwctx->act_dev, &fence_spawn, + hwctx->alloc, &cmd->fence); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create, + hwctx->alloc, &cmd->pool); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Command pool creation failure: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + cbuf_create.commandPool = cmd->pool; + + ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create, &cmd->buf); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + vkGetDeviceQueue(hwctx->act_dev, cqueue_create.queueFamilyIndex, 0, + &cmd->queue); + + return 0; +} + +static void free_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd) +{ + AVVulkanDeviceContext *hwctx = ctx->hwctx; + + vkDestroyFence(hwctx->act_dev, cmd->fence, hwctx->alloc); + + if (cmd->buf) + vkFreeCommandBuffers(hwctx->act_dev, cmd->pool, 1, &cmd->buf); + if (cmd->pool) + vkDestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc); +} + +static void vulkan_device_free(AVHWDeviceContext *ctx) +{ + VulkanDevicePriv *p = ctx->internal->priv; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + + free_exec_ctx(ctx, &p->cmd); + + vkDestroyDevice(hwctx->act_dev, hwctx->alloc); + + if (p->debug_ctx) { + VK_LOAD_PFN(hwctx->inst, vkDestroyDebugUtilsMessengerEXT); + pfn_vkDestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx, + hwctx->alloc); + } + + vkDestroyInstance(hwctx->inst, hwctx->alloc); +} + +static int vulkan_device_create_internal(AVHWDeviceContext *ctx, + VulkanDeviceSelection *dev_select, + AVDictionary *opts, int flags) +{ + int err = 0; + VkResult ret; + AVDictionaryEntry *opt_d; + VulkanDevicePriv *p = ctx->internal->priv; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VkDeviceQueueCreateInfo queue_create_info[3] = { + { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .pQueuePriorities = (float []){ 1.0f }, + .queueCount = 1, }, + { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .pQueuePriorities = (float []){ 1.0f }, + .queueCount = 1, }, + { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .pQueuePriorities = (float []){ 1.0f }, + .queueCount = 1, }, + }; + + VkDeviceCreateInfo dev_info = { + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .pQueueCreateInfos = queue_create_info, + .queueCreateInfoCount = 0, + }; + + ctx->free = vulkan_device_free; + + /* Create an instance if not given one */ + if ((err = create_instance(ctx, opts))) + goto end; + + /* Find a device (if not given one) */ + if ((err = find_device(ctx, dev_select))) + goto end; + + vkGetPhysicalDeviceProperties(hwctx->phys_dev, &p->props); + av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", p->props.deviceName); + av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n"); + av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyOffsetAlignment: %li\n", + p->props.limits.optimalBufferCopyOffsetAlignment); + av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %li\n", + p->props.limits.optimalBufferCopyRowPitchAlignment); + av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %li\n", + p->props.limits.minMemoryMapAlignment); + + /* Search queue family */ + if ((err = search_queue_families(ctx, &dev_info))) + goto end; + + if ((err = check_extensions(ctx, 1, &dev_info.ppEnabledExtensionNames, + &dev_info.enabledExtensionCount, 0))) + goto end; + + ret = vkCreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc, + &hwctx->act_dev); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n", + vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto end; + } + + av_free((void *)dev_info.ppEnabledExtensionNames); + + /* Tiled images setting, use them by default */ + opt_d = av_dict_get(opts, "linear_images", NULL, 0); + if (opt_d) + p->use_linear_images = strtol(opt_d->value, NULL, 10); + +end: + return err; +} + +static int vulkan_device_init(AVHWDeviceContext *ctx) +{ + int err; + uint32_t queue_num; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VulkanDevicePriv *p = ctx->internal->priv; + + vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL); + if (!queue_num) { + av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n"); + return AVERROR_EXTERNAL; + } + + if (hwctx->queue_family_index >= queue_num || + hwctx->queue_family_tx_index >= queue_num || + hwctx->queue_family_comp_index >= queue_num) { + av_log(ctx, AV_LOG_ERROR, "Invalid queue index!\n"); + return AVERROR_EXTERNAL; + } + + /* Create exec context - if there's something invalid this will error out */ + err = create_exec_ctx(ctx, &p->cmd, hwctx->queue_family_tx_index); + if (err) + return err; + + /* Get device capabilities */ + vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); + + return 0; +} + +static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device, + AVDictionary *opts, int flags) +{ + VulkanDeviceSelection dev_select = { 0 }; + if (device && device[0]) { + char *end = NULL; + dev_select.index = strtol(device, &end, 10); + if (end == device) { + dev_select.index = 0; + dev_select.name = device; + } + } + + return vulkan_device_create_internal(ctx, &dev_select, opts, flags); +} + +static int vulkan_device_derive(AVHWDeviceContext *ctx, + AVHWDeviceContext *src_ctx, int flags) +{ + av_unused VulkanDeviceSelection dev_select = { 0 }; + + /* If there's only one device on the system, then even if its not covered + * by the following checks (e.g. non-PCIe ARM GPU), having an empty + * dev_select will mean it'll get picked. */ + switch(src_ctx->type) { +#if CONFIG_LIBDRM +#if CONFIG_VAAPI + case AV_HWDEVICE_TYPE_VAAPI: { + AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx; + + const char *vendor = vaQueryVendorString(src_hwctx->display); + if (!vendor) { + av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n"); + return AVERROR_EXTERNAL; + } + + if (strstr(vendor, "Intel")) + dev_select.vendor_id = 0x8086; + if (strstr(vendor, "AMD")) + dev_select.vendor_id = 0x1002; + + return vulkan_device_create_internal(ctx, &dev_select, NULL, flags); + } +#endif + case AV_HWDEVICE_TYPE_DRM: { + AVDRMDeviceContext *src_hwctx = src_ctx->hwctx; + + drmDevice *drm_dev_info; + int err = drmGetDevice(src_hwctx->fd, &drm_dev_info); + if (err) { + av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n"); + return AVERROR_EXTERNAL; + } + + if (drm_dev_info->bustype == DRM_BUS_PCI) + dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id; + + drmFreeDevice(&drm_dev_info); + + return vulkan_device_create_internal(ctx, &dev_select, NULL, flags); + } +#endif +#if CONFIG_CUDA + case AV_HWDEVICE_TYPE_CUDA: { + AVHWDeviceContext *cuda_cu = src_ctx; + AVCUDADeviceContext *src_hwctx = src_ctx->hwctx; + AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal; + CudaFunctions *cu = cu_internal->cuda_dl; + + int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid, + cu_internal->cuda_device)); + if (ret < 0) { + av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n"); + return AVERROR_EXTERNAL; + } + + dev_select.has_uuid = 1; + + return vulkan_device_create_internal(ctx, &dev_select, NULL, flags); + } +#endif + default: + return AVERROR(ENOSYS); + } +} + +static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, + const void *hwconfig, + AVHWFramesConstraints *constraints) +{ + int count = 0; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VulkanDevicePriv *p = ctx->internal->priv; + + for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++) + count += pixfmt_is_supported(hwctx, i, p->use_linear_images); + +#if CONFIG_CUDA + count++; +#endif + + constraints->valid_sw_formats = av_malloc_array(count + 1, + sizeof(enum AVPixelFormat)); + if (!constraints->valid_sw_formats) + return AVERROR(ENOMEM); + + count = 0; + for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++) + if (pixfmt_is_supported(hwctx, i, p->use_linear_images)) + constraints->valid_sw_formats[count++] = i; + +#if CONFIG_CUDA + constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA; +#endif + constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE; + + constraints->min_width = 0; + constraints->min_height = 0; + constraints->max_width = p->props.limits.maxImageDimension2D; + constraints->max_height = p->props.limits.maxImageDimension2D; + + constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat)); + if (!constraints->valid_hw_formats) + return AVERROR(ENOMEM); + + constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN; + constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE; + + return 0; +} + +static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) +{ + VkResult ret; + int index = -1; + VulkanDevicePriv *p = ctx->internal->priv; + AVVulkanDeviceContext *dev_hwctx = ctx->hwctx; + VkMemoryAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = alloc_extension, + }; + + /* Align if we need to */ + if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + req->size = FFALIGN(req->size, p->props.limits.minMemoryMapAlignment); + + alloc_info.allocationSize = req->size; + + /* The vulkan spec requires memory types to be sorted in the "optimal" + * order, so the first matching type we find will be the best/fastest one */ + for (int i = 0; i < p->mprops.memoryTypeCount; i++) { + /* The memory type must be supported by the requirements (bitfield) */ + if (!(req->memoryTypeBits & (1 << i))) + continue; + + /* The memory type flags must include our properties */ + if ((p->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags) + continue; + + /* Found a suitable memory type */ + index = i; + break; + } + + if (index < 0) { + av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", + req_flags); + return AVERROR(EINVAL); + } + + alloc_info.memoryTypeIndex = index; + + ret = vkAllocateMemory(dev_hwctx->act_dev, &alloc_info, + dev_hwctx->alloc, mem); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n", + vk_ret2str(ret)); + return AVERROR(ENOMEM); + } + + *mem_flags |= p->mprops.memoryTypes[index].propertyFlags; + + return 0; +} + +typedef struct VulkanFramesPriv { + VulkanExecCtx cmd; +} VulkanFramesPriv; + +static void vulkan_free_internal(AVVkFrameInternal *internal) +{ + if (!internal) + return; + +#if CONFIG_CUDA + if (internal->cuda_fc_ref) { + AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data; + int planes = av_pix_fmt_count_planes(cuda_fc->sw_format); + AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; + AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; + AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; + CudaFunctions *cu = cu_internal->cuda_dl; + + for (int i = 0; i < planes; i++) { + if (internal->cu_sem[i]) + CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i])); + if (internal->cu_mma[i]) + CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i])); + if (internal->ext_mem[i]) + CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i])); + } + + av_buffer_unref(&internal->cuda_fc_ref); + } +#endif + + av_free(internal); +} + +static void vulkan_frame_free(void *opaque, uint8_t *data) +{ + AVVkFrame *f = (AVVkFrame *)data; + AVHWFramesContext *hwfc = opaque; + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; + int planes = av_pix_fmt_count_planes(hwfc->sw_format); + + if (!f) + return; + + vulkan_free_internal(f->internal); + + for (int i = 0; i < planes; i++) { + vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); + vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); + vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); + } + + av_free(f); +} + +static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, + void *alloc_pnext, size_t alloc_pnext_stride) +{ + int err; + VkResult ret; + AVHWDeviceContext *ctx = hwfc->device_ctx; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } }; + + AVVulkanDeviceContext *hwctx = ctx->hwctx; + + for (int i = 0; i < planes; i++) { + int use_ded_mem; + VkImageMemoryRequirementsInfo2 req_desc = { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, + .image = f->img[i], + }; + VkMemoryDedicatedAllocateInfo ded_alloc = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, + .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride), + }; + VkMemoryDedicatedRequirements ded_req = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, + }; + VkMemoryRequirements2 req = { + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, + .pNext = &ded_req, + }; + + vkGetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req); + + /* In case the implementation prefers/requires dedicated allocation */ + use_ded_mem = ded_req.prefersDedicatedAllocation | + ded_req.requiresDedicatedAllocation; + if (use_ded_mem) + ded_alloc.image = f->img[i]; + + /* Allocate memory */ + if ((err = alloc_mem(ctx, &req.memoryRequirements, + f->tiling == VK_IMAGE_TILING_LINEAR ? + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT : + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, + &f->flags, &f->mem[i]))) + return err; + + f->size[i] = req.memoryRequirements.size; + bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; + bind_info[i].image = f->img[i]; + bind_info[i].memory = f->mem[i]; + } + + /* Bind the allocated memory to the images */ + ret = vkBindImageMemory2(hwctx->act_dev, planes, bind_info); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +static int prepare_frame(AVHWFramesContext *hwfc, AVVkFrame *frame) +{ + VkResult ret; + AVHWDeviceContext *ctx = hwfc->device_ctx; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VulkanFramesPriv *s = hwfc->internal->priv; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + + VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; + + VkCommandBufferBeginInfo cmd_start = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + + VkSubmitInfo s_info = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &s->cmd.buf, + + .pSignalSemaphores = frame->sem, + .signalSemaphoreCount = planes, + }; + + ret = vkBeginCommandBuffer(s->cmd.buf, &cmd_start); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + /* Change the image layout to something more optimal for writes */ + for (int i = 0; i < planes; i++) { + img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + img_bar[i].srcAccessMask = 0x0; + img_bar[i].dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + img_bar[i].oldLayout = frame->layout[i]; + img_bar[i].newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + img_bar[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + img_bar[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + img_bar[i].image = frame->img[i]; + img_bar[i].subresourceRange.levelCount = 1; + img_bar[i].subresourceRange.layerCount = 1; + img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + + frame->layout[i] = img_bar[i].newLayout; + frame->access[i] = img_bar[i].dstAccessMask; + } + + vkCmdPipelineBarrier(s->cmd.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + 0, NULL, 0, NULL, planes, img_bar); + + ret = vkEndCommandBuffer(s->cmd.buf); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + ret = vkQueueSubmit(s->cmd.queue, 1, &s_info, s->cmd.fence); + if (ret != VK_SUCCESS) { + return AVERROR_EXTERNAL; + } else { + vkWaitForFences(hwctx->act_dev, 1, &s->cmd.fence, VK_TRUE, UINT64_MAX); + vkResetFences(hwctx->act_dev, 1, &s->cmd.fence); + } + + return 0; +} + +static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, + VkImageTiling tiling, VkImageUsageFlagBits usage, + void *create_pnext) +{ + int err; + VkResult ret; + AVHWDeviceContext *ctx = hwfc->device_ctx; + VulkanDevicePriv *p = ctx->internal->priv; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + enum AVPixelFormat format = hwfc->sw_format; + const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format); + const int planes = av_pix_fmt_count_planes(format); + + VkExportSemaphoreCreateInfo ext_sem_info = { + .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, + .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, + }; + + VkSemaphoreCreateInfo sem_spawn = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = p->extensions & EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL, + }; + + AVVkFrame *f = av_mallocz(sizeof(*f)); + if (!f) { + av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n"); + err = AVERROR(ENOMEM); + goto fail; + } + + /* Create the images */ + for (int i = 0; i < planes; i++) { + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format); + int w = hwfc->width; + int h = hwfc->height; + const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w; + const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h; + + VkImageCreateInfo image_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = create_pnext, + .imageType = VK_IMAGE_TYPE_2D, + .format = img_fmts[i], + .extent.width = p_w, + .extent.height = p_h, + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .flags = VK_IMAGE_CREATE_ALIAS_BIT, + .tiling = tiling, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .samples = VK_SAMPLE_COUNT_1_BIT, + }; + + ret = vkCreateImage(hwctx->act_dev, &image_create_info, + hwctx->alloc, &f->img[i]); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n", + vk_ret2str(ret)); + err = AVERROR(EINVAL); + goto fail; + } + + /* Create semaphore */ + ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn, + hwctx->alloc, &f->sem[i]); + if (ret != VK_SUCCESS) { + av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + f->layout[i] = image_create_info.initialLayout; + f->access[i] = 0x0; + } + + f->flags = 0x0; + f->tiling = tiling; + + *frame = f; + return 0; + +fail: + vulkan_frame_free(hwfc, (uint8_t *)f); + return err; +} + +/* Checks if an export flag is enabled, and if it is ORs it with *iexp */ +static void try_export_flags(AVHWFramesContext *hwfc, + VkExternalMemoryHandleTypeFlags *comp_handle_types, + VkExternalMemoryHandleTypeFlagBits *iexp, + VkExternalMemoryHandleTypeFlagBits exp) +{ + VkResult ret; + AVVulkanFramesContext *hwctx = hwfc->hwctx; + AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx; + VkExternalImageFormatProperties eprops = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR, + }; + VkImageFormatProperties2 props = { + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, + .pNext = &eprops, + }; + VkPhysicalDeviceExternalImageFormatInfo enext = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO, + .handleType = exp, + }; + VkPhysicalDeviceImageFormatInfo2 pinfo = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, + .pNext = !exp ? NULL : &enext, + .format = av_vkfmt_from_pixfmt(hwfc->sw_format)[0], + .type = VK_IMAGE_TYPE_2D, + .tiling = hwctx->tiling, + .usage = hwctx->usage, + .flags = VK_IMAGE_CREATE_ALIAS_BIT, + }; + + ret = vkGetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev, + &pinfo, &props); + if (ret == VK_SUCCESS) { + *iexp |= exp; + *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes; + } +} + +static AVBufferRef *vulkan_pool_alloc(void *opaque, int size) +{ + int err; + AVVkFrame *f; + AVBufferRef *avbuf = NULL; + AVHWFramesContext *hwfc = opaque; + AVVulkanFramesContext *hwctx = hwfc->hwctx; + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS]; + VkExternalMemoryHandleTypeFlags e = 0x0; + + VkExternalMemoryImageCreateInfo eiinfo = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, + .pNext = hwctx->create_pnext, + }; + + if (p->extensions & EXT_EXTERNAL_FD_MEMORY) + try_export_flags(hwfc, &eiinfo.handleTypes, &e, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT); + + if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY) + try_export_flags(hwfc, &eiinfo.handleTypes, &e, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); + + for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) { + eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; + eminfo[i].pNext = hwctx->alloc_pnext[i]; + eminfo[i].handleTypes = e; + } + + err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, + eiinfo.handleTypes ? &eiinfo : NULL); + if (err) + goto fail; + + err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo)); + if (err) + goto fail; + + err = prepare_frame(hwfc, f); + if (err) + goto fail; + + avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame), + vulkan_frame_free, hwfc, 0); + if (!avbuf) + goto fail; + + return avbuf; + +fail: + vulkan_frame_free(hwfc, (uint8_t *)f); + return NULL; +} + +static void vulkan_frames_uninit(AVHWFramesContext *hwfc) +{ + VulkanFramesPriv *fp = hwfc->internal->priv; + + free_exec_ctx(hwfc->device_ctx, &fp->cmd); +} + +static int vulkan_frames_init(AVHWFramesContext *hwfc) +{ + int err; + AVVkFrame *f; + AVVulkanFramesContext *hwctx = hwfc->hwctx; + VulkanFramesPriv *fp = hwfc->internal->priv; + AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx; + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + + if (hwfc->pool) + return 0; + + /* Default pool flags */ + hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ? + VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; + + hwctx->usage |= DEFAULT_USAGE_FLAGS; + + err = create_exec_ctx(hwfc->device_ctx, &fp->cmd, + dev_hwctx->queue_family_tx_index); + if (err) + return err; + + /* Test to see if allocation will fail */ + err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, + hwctx->create_pnext); + if (err) + return err; + + vulkan_frame_free(hwfc, (uint8_t *)f); + + hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame), + hwfc, vulkan_pool_alloc, + NULL); + if (!hwfc->internal->pool_internal) + return AVERROR(ENOMEM); + + return 0; +} + +static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame) +{ + frame->buf[0] = av_buffer_pool_get(hwfc->pool); + if (!frame->buf[0]) + return AVERROR(ENOMEM); + + frame->data[0] = frame->buf[0]->data; + frame->format = AV_PIX_FMT_VULKAN; + frame->width = hwfc->width; + frame->height = hwfc->height; + + return 0; +} + +static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc, + enum AVHWFrameTransferDirection dir, + enum AVPixelFormat **formats) +{ + enum AVPixelFormat *fmts = av_malloc_array(2, sizeof(*fmts)); + if (!fmts) + return AVERROR(ENOMEM); + + fmts[0] = hwfc->sw_format; + fmts[1] = AV_PIX_FMT_NONE; + + *formats = fmts; + return 0; +} + +typedef struct VulkanMapping { + AVVkFrame *frame; + int flags; +} VulkanMapping; + +static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) +{ + VulkanMapping *map = hwmap->priv; + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + + /* Check if buffer needs flushing */ + if ((map->flags & AV_HWFRAME_MAP_WRITE) && + !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + VkResult ret; + VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } }; + + for (int i = 0; i < planes; i++) { + flush_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + flush_ranges[i].memory = map->frame->mem[i]; + flush_ranges[i].size = VK_WHOLE_SIZE; + } + + ret = vkFlushMappedMemoryRanges(hwctx->act_dev, planes, + flush_ranges); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n", + vk_ret2str(ret)); + } + } + + for (int i = 0; i < planes; i++) + vkUnmapMemory(hwctx->act_dev, map->frame->mem[i]); + + av_free(map); +} + +static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + VkResult ret; + int err, mapped_mem_count = 0; + AVVkFrame *f = (AVVkFrame *)src->data[0]; + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + + VulkanMapping *map = av_mallocz(sizeof(VulkanMapping)); + if (!map) + return AVERROR(EINVAL); + + if (src->format != AV_PIX_FMT_VULKAN) { + av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n", + av_get_pix_fmt_name(src->format)); + err = AVERROR(EINVAL); + goto fail; + } + + if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) || + !(f->tiling == VK_IMAGE_TILING_LINEAR)) { + av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible " + "and linear!\n"); + err = AVERROR(EINVAL); + goto fail; + } + + dst->width = src->width; + dst->height = src->height; + + for (int i = 0; i < planes; i++) { + ret = vkMapMemory(hwctx->act_dev, f->mem[i], 0, + VK_WHOLE_SIZE, 0, (void **)&dst->data[i]); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n", + vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + mapped_mem_count++; + } + + /* Check if the memory contents matter */ + if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) && + !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } }; + for (int i = 0; i < planes; i++) { + map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + map_mem_ranges[i].size = VK_WHOLE_SIZE; + map_mem_ranges[i].memory = f->mem[i]; + } + + ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, planes, + map_mem_ranges); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n", + vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + } + + for (int i = 0; i < planes; i++) { + VkImageSubresource sub = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + }; + VkSubresourceLayout layout; + vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout); + dst->linesize[i] = layout.rowPitch; + } + + map->frame = f; + map->flags = flags; + + err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, + &vulkan_unmap_frame, map); + if (err < 0) + goto fail; + + return 0; + +fail: + for (int i = 0; i < mapped_mem_count; i++) + vkUnmapMemory(hwctx->act_dev, f->mem[i]); + + av_free(map); + return err; +} + +#if CONFIG_LIBDRM +static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) +{ + VulkanMapping *map = hwmap->priv; + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + + for (int i = 0; i < planes; i++) { + vkDestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc); + vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc); + vkDestroySemaphore(hwctx->act_dev, map->frame->sem[i], hwctx->alloc); + } + + av_freep(&map->frame); +} + +static const struct { + uint32_t va_fourcc; + VkFormat vk_format; +} vulkan_drm_format_map[] = { + { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM }, + { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM }, + { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM }, + { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM }, + { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM }, + { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM }, + { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM }, + { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM }, + { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM }, + { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM }, +}; + +static inline VkFormat drm_to_vulkan_fmt(uint32_t va_fourcc) +{ + for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++) + if (vulkan_drm_format_map[i].va_fourcc == va_fourcc) + return vulkan_drm_format_map[i].vk_format; + return VK_FORMAT_UNDEFINED; +} + +static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame, + AVDRMFrameDescriptor *desc) +{ + int err = 0; + VkResult ret; + AVVkFrame *f; + AVHWDeviceContext *ctx = hwfc->device_ctx; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VulkanDevicePriv *p = ctx->internal->priv; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(hwfc->sw_format); + const int has_modifiers = p->extensions & EXT_DRM_MODIFIER_FLAGS; + VkSubresourceLayout plane_data[AV_NUM_DATA_POINTERS]; + VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS]; + VkExternalMemoryHandleTypeFlagBits htype = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + + VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdPropertiesKHR); + + for (int i = 0; i < desc->nb_layers; i++) { + if (desc->layers[i].nb_planes > 1) { + av_log(ctx, AV_LOG_ERROR, "Cannot import DMABUFS with more than 1 " + "plane per layer!\n"); + return AVERROR(EINVAL); + } + + if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) { + av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format!\n"); + return AVERROR(EINVAL); + } + } + + if (!(f = av_mallocz(sizeof(*f)))) { + av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n"); + err = AVERROR(ENOMEM); + goto fail; + } + + for (int i = 0; i < desc->nb_objects; i++) { + VkMemoryFdPropertiesKHR fdmp = { + .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR, + }; + VkMemoryRequirements req = { + .size = desc->objects[i].size, + }; + VkImportMemoryFdInfoKHR idesc = { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, + .handleType = htype, + .fd = desc->objects[i].fd, + }; + + ret = pfn_vkGetMemoryFdPropertiesKHR(hwctx->act_dev, htype, + desc->objects[i].fd, &fdmp); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n", + vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + req.memoryTypeBits = fdmp.memoryTypeBits; + + err = alloc_mem(ctx, &req, 0x0, &idesc, &f->flags, &f->mem[i]); + if (err) + return err; + + f->size[i] = desc->objects[i].size; + } + + f->tiling = has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT : + desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ? + VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; + + for (int i = 0; i < desc->nb_layers; i++) { + VkImageDrmFormatModifierExplicitCreateInfoEXT drm_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT, + .drmFormatModifier = desc->objects[0].format_modifier, + .drmFormatModifierPlaneCount = desc->layers[i].nb_planes, + .pPlaneLayouts = (const VkSubresourceLayout *)&plane_data, + }; + + VkExternalMemoryImageCreateInfo einfo = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, + .pNext = has_modifiers ? &drm_info : NULL, + .handleTypes = htype, + }; + + VkSemaphoreCreateInfo sem_spawn = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + }; + + const int p_w = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, fmt_desc->log2_chroma_w) : hwfc->width; + const int p_h = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, fmt_desc->log2_chroma_h) : hwfc->height; + + VkImageCreateInfo image_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = &einfo, + .imageType = VK_IMAGE_TYPE_2D, + .format = drm_to_vulkan_fmt(desc->layers[i].format), + .extent.width = p_w, + .extent.height = p_h, + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .flags = VK_IMAGE_CREATE_ALIAS_BIT, + .tiling = f->tiling, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */ + .usage = DEFAULT_USAGE_FLAGS, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .samples = VK_SAMPLE_COUNT_1_BIT, + }; + + for (int j = 0; j < desc->layers[i].nb_planes; j++) { + plane_data[j].offset = desc->layers[i].planes[j].offset; + plane_data[j].rowPitch = desc->layers[i].planes[j].pitch; + plane_data[j].size = 0; /* The specs say so for all 3 */ + plane_data[j].arrayPitch = 0; + plane_data[j].depthPitch = 0; + } + + /* Create image */ + ret = vkCreateImage(hwctx->act_dev, &image_create_info, + hwctx->alloc, &f->img[i]); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n", + vk_ret2str(ret)); + err = AVERROR(EINVAL); + goto fail; + } + + ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn, + hwctx->alloc, &f->sem[i]); + if (ret != VK_SUCCESS) { + av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + /* We'd import a semaphore onto the one we created using + * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI + * offer us anything we could import and sync with, so instead + * leave the semaphore unsignalled and enjoy the validation spam. */ + + f->layout[i] = image_create_info.initialLayout; + f->access[i] = 0x0; + + /* TODO: Fix to support more than 1 plane per layer */ + bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; + bind_info[i].pNext = NULL; + bind_info[i].image = f->img[i]; + bind_info[i].memory = f->mem[desc->layers[i].planes[0].object_index]; + bind_info[i].memoryOffset = desc->layers[i].planes[0].offset; + } + + /* Bind the allocated memory to the images */ + ret = vkBindImageMemory2(hwctx->act_dev, planes, bind_info); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + *frame = f; + + return 0; + +fail: + for (int i = 0; i < planes; i++) { + vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); + vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); + vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); + } + + av_free(f); + + return err; +} + +static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + int err = 0; + AVVkFrame *f; + VulkanMapping *map = NULL; + + err = vulkan_map_from_drm_frame_desc(hwfc, &f, + (AVDRMFrameDescriptor *)src->data[0]); + if (err) + goto fail; + + /* The unmapping function will free this */ + dst->data[0] = (uint8_t *)f; + dst->width = src->width; + dst->height = src->height; + + map = av_mallocz(sizeof(VulkanMapping)); + if (!map) + goto fail; + + map->frame = f; + map->flags = flags; + + err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src, + &vulkan_unmap_from, map); + if (err < 0) + goto fail; + + av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n"); + + return 0; + +fail: + vulkan_frame_free(hwfc->device_ctx->hwctx, (uint8_t *)f); + av_free(map); + return err; +} + +#if CONFIG_VAAPI +static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc, + AVFrame *dst, const AVFrame *src, + int flags) +{ + int err; + AVFrame *tmp = av_frame_alloc(); + AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data; + AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx; + VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3]; + + if (!tmp) + return AVERROR(ENOMEM); + + /* We have to sync since like the previous comment said, no semaphores */ + vaSyncSurface(vaapi_ctx->display, surface_id); + + tmp->format = AV_PIX_FMT_DRM_PRIME; + + err = av_hwframe_map(tmp, src, flags); + if (err < 0) + goto fail; + + err = vulkan_map_from_drm(dst_fc, dst, tmp, flags); + if (err < 0) + goto fail; + + err = ff_hwframe_map_replace(dst, src); + +fail: + av_frame_free(&tmp); + return err; +} +#endif +#endif + +#if CONFIG_CUDA +static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, + AVBufferRef *cuda_hwfc, + const AVFrame *frame) +{ + int err; + VkResult ret; + AVVkFrame *dst_f; + AVVkFrameInternal *dst_int; + AVHWDeviceContext *ctx = hwfc->device_ctx; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); + VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR); + VK_LOAD_PFN(hwctx->inst, vkGetSemaphoreFdKHR); + + AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data; + AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; + AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; + AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; + CudaFunctions *cu = cu_internal->cuda_dl; + CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 : + CU_AD_FORMAT_UNSIGNED_INT8; + + dst_f = (AVVkFrame *)frame->data[0]; + + dst_int = dst_f->internal; + if (!dst_int || !dst_int->cuda_fc_ref) { + if (!dst_f->internal) + dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal)); + + if (!dst_int) { + err = AVERROR(ENOMEM); + goto fail; + } + + dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc); + if (!dst_int->cuda_fc_ref) { + err = AVERROR(ENOMEM); + goto fail; + } + + for (int i = 0; i < planes; i++) { + CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = { + .offset = 0, + .arrayDesc = { + .Width = i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w) + : hwfc->width, + .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h) + : hwfc->height, + .Depth = 0, + .Format = cufmt, + .NumChannels = 1 + ((planes == 2) && i), + .Flags = 0, + }, + .numLevels = 1, + }; + CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = { + .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD, + .size = dst_f->size[i], + }; + VkMemoryGetFdInfoKHR export_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, + .memory = dst_f->mem[i], + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, + }; + VkSemaphoreGetFdInfoKHR sem_export = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, + .semaphore = dst_f->sem[i], + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, + }; + CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = { + .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD, + }; + + ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info, + &ext_desc.handle.fd); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n"); + err = AVERROR_EXTERNAL; + goto fail; + } + + ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[i], &ext_desc)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + + ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i], + dst_int->ext_mem[i], + &tex_desc)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + + ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i], + dst_int->cu_mma[i], 0)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + + ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export, + &ext_sem_desc.handle.fd); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i], + &ext_sem_desc)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + } + } + return 0; + +fail: + return -err; +} + +static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, + AVFrame *dst, const AVFrame *src) +{ + int err; + VkResult ret; + CUcontext dummy; + AVVkFrame *dst_f; + AVVkFrameInternal *dst_int; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); + + AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data; + AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; + AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; + AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; + CudaFunctions *cu = cu_internal->cuda_dl; + CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 }; + CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 }; + + ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + + dst_f = (AVVkFrame *)dst->data[0]; + + ret = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst); + if (ret < 0) { + goto fail; + } + dst_int = dst_f->internal; + + ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par, + planes, cuda_dev->stream)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + + for (int i = 0; i < planes; i++) { + CUDA_MEMCPY2D cpy = { + .srcMemoryType = CU_MEMORYTYPE_DEVICE, + .srcDevice = (CUdeviceptr)src->data[i], + .srcPitch = src->linesize[i], + .srcY = 0, + + .dstMemoryType = CU_MEMORYTYPE_ARRAY, + .dstArray = dst_int->cu_array[i], + .WidthInBytes = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w) + : hwfc->width) * desc->comp[i].step, + .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h) + : hwfc->height, + }; + + ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + } + + ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par, + planes, cuda_dev->stream)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + + av_log(hwfc, AV_LOG_VERBOSE, "Transfered CUDA image to Vulkan!\n"); + + return 0; + +fail: + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + vulkan_free_internal(dst_int); + dst_f->internal = NULL; + av_buffer_unref(&dst->buf[0]); + return err; +} +#endif + +static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + + switch (src->format) { +#if CONFIG_LIBDRM +#if CONFIG_VAAPI + case AV_PIX_FMT_VAAPI: + if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY) + return vulkan_map_from_vaapi(hwfc, dst, src, flags); +#endif + case AV_PIX_FMT_DRM_PRIME: + if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY) + return vulkan_map_from_drm(hwfc, dst, src, flags); +#endif + default: + return AVERROR(ENOSYS); + } +} + +#if CONFIG_LIBDRM +typedef struct VulkanDRMMapping { + AVDRMFrameDescriptor drm_desc; + AVVkFrame *source; +} VulkanDRMMapping; + +static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) +{ + AVDRMFrameDescriptor *drm_desc = hwmap->priv; + + for (int i = 0; i < drm_desc->nb_objects; i++) + close(drm_desc->objects[i].fd); + + av_free(drm_desc); +} + +static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt) +{ + for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++) + if (vulkan_drm_format_map[i].vk_format == vkfmt) + return vulkan_drm_format_map[i].va_fourcc; + return DRM_FORMAT_INVALID; +} + +static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + int err = 0; + VkResult ret; + AVVkFrame *f = (AVVkFrame *)src->data[0]; + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR); + VkImageDrmFormatModifierPropertiesEXT drm_mod = { + .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT, + }; + + AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc)); + if (!drm_desc) + return AVERROR(ENOMEM); + + err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc); + if (err < 0) + goto end; + + if (p->extensions & EXT_DRM_MODIFIER_FLAGS) { + VK_LOAD_PFN(hwctx->inst, vkGetImageDrmFormatModifierPropertiesEXT); + ret = pfn_vkGetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0], + &drm_mod); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n"); + err = AVERROR_EXTERNAL; + goto end; + } + } + + for (int i = 0; (i < planes) && (f->mem[i]); i++) { + VkMemoryGetFdInfoKHR export_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, + .memory = f->mem[i], + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, + }; + + ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info, + &drm_desc->objects[i].fd); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n"); + err = AVERROR_EXTERNAL; + goto end; + } + + drm_desc->nb_objects++; + drm_desc->objects[i].size = f->size[i]; + drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier; + } + + drm_desc->nb_layers = planes; + for (int i = 0; i < drm_desc->nb_layers; i++) { + VkSubresourceLayout layout; + VkImageSubresource sub = { + .aspectMask = p->extensions & EXT_DRM_MODIFIER_FLAGS ? + VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT : + VK_IMAGE_ASPECT_COLOR_BIT, + }; + VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i]; + + drm_desc->layers[i].format = vulkan_fmt_to_drm(plane_vkfmt); + drm_desc->layers[i].nb_planes = 1; + + if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) { + av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n"); + err = AVERROR_PATCHWELCOME; + goto end; + } + + drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1); + + if (f->tiling != VK_IMAGE_TILING_OPTIMAL) + continue; + + vkGetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout); + drm_desc->layers[i].planes[0].offset = layout.offset; + drm_desc->layers[i].planes[0].pitch = layout.rowPitch; + } + + dst->width = src->width; + dst->height = src->height; + dst->data[0] = (uint8_t *)drm_desc; + + av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n"); + + return 0; + +end: + av_free(drm_desc); + return err; +} + +#if CONFIG_VAAPI +static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + int err; + AVFrame *tmp = av_frame_alloc(); + if (!tmp) + return AVERROR(ENOMEM); + + tmp->format = AV_PIX_FMT_DRM_PRIME; + + err = vulkan_map_to_drm(hwfc, tmp, src, flags); + if (err < 0) + goto fail; + + err = av_hwframe_map(dst, tmp, flags); + if (err < 0) + goto fail; + + err = ff_hwframe_map_replace(dst, src); + +fail: + av_frame_free(&tmp); + return err; +} +#endif +#endif + +static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + + switch (dst->format) { +#if CONFIG_LIBDRM + case AV_PIX_FMT_DRM_PRIME: + if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY) + return vulkan_map_to_drm(hwfc, dst, src, flags); +#if CONFIG_VAAPI + case AV_PIX_FMT_VAAPI: + if (p->extensions & EXT_EXTERNAL_DMABUF_MEMORY) + return vulkan_map_to_vaapi(hwfc, dst, src, flags); +#endif +#endif + default: + return vulkan_map_frame_to_mem(hwfc, dst, src, flags); + } +} + +typedef struct ImageBuffer { + VkBuffer buf; + VkDeviceMemory mem; + VkMemoryPropertyFlagBits flags; +} ImageBuffer; + +static void free_buf(AVHWDeviceContext *ctx, ImageBuffer *buf) +{ + AVVulkanDeviceContext *hwctx = ctx->hwctx; + if (!buf) + return; + + vkDestroyBuffer(hwctx->act_dev, buf->buf, hwctx->alloc); + vkFreeMemory(hwctx->act_dev, buf->mem, hwctx->alloc); +} + +static int create_buf(AVHWDeviceContext *ctx, ImageBuffer *buf, int height, + int *stride, VkBufferUsageFlags usage, + VkMemoryPropertyFlagBits flags, void *create_pnext, + void *alloc_pnext) +{ + int err; + VkResult ret; + VkMemoryRequirements req; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VulkanDevicePriv *p = ctx->internal->priv; + + VkBufferCreateInfo buf_spawn = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = create_pnext, + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + + *stride = FFALIGN(*stride, p->props.limits.optimalBufferCopyRowPitchAlignment); + buf_spawn.size = height*(*stride); + + ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &buf->buf); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + vkGetBufferMemoryRequirements(hwctx->act_dev, buf->buf, &req); + + err = alloc_mem(ctx, &req, flags, alloc_pnext, &buf->flags, &buf->mem); + if (err) + return err; + + ret = vkBindBufferMemory(hwctx->act_dev, buf->buf, buf->mem, 0); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n", + vk_ret2str(ret)); + free_buf(ctx, buf); + return AVERROR_EXTERNAL; + } + + return 0; +} + +static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, uint8_t *mem[], + int nb_buffers, int invalidate) +{ + VkResult ret; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS]; + int invalidate_count = 0; + + for (int i = 0; i < nb_buffers; i++) { + ret = vkMapMemory(hwctx->act_dev, buf[i].mem, 0, + VK_WHOLE_SIZE, 0, (void **)&mem[i]); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + if (!invalidate) + return 0; + + for (int i = 0; i < nb_buffers; i++) { + const VkMappedMemoryRange ival_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = buf[i].mem, + .size = VK_WHOLE_SIZE, + }; + if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + continue; + invalidate_ctx[invalidate_count++] = ival_buf; + } + + if (invalidate_count) { + ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count, + invalidate_ctx); + if (ret != VK_SUCCESS) + av_log(ctx, AV_LOG_WARNING, "Failed to invalidate memory: %s\n", + vk_ret2str(ret)); + } + + return 0; +} + +static int unmap_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, + int nb_buffers, int flush) +{ + int err = 0; + VkResult ret; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS]; + int flush_count = 0; + + if (flush) { + for (int i = 0; i < nb_buffers; i++) { + const VkMappedMemoryRange flush_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = buf[i].mem, + .size = VK_WHOLE_SIZE, + }; + if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + continue; + flush_ctx[flush_count++] = flush_buf; + } + } + + if (flush_count) { + ret = vkFlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n", + vk_ret2str(ret)); + err = AVERROR_EXTERNAL; /* We still want to try to unmap them */ + } + } + + for (int i = 0; i < nb_buffers; i++) + vkUnmapMemory(hwctx->act_dev, buf[i].mem); + + return err; +} + +static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame, + ImageBuffer *buffer, const int *buf_stride, int w, + int h, enum AVPixelFormat pix_fmt, int to_buf) +{ + VkResult ret; + AVVulkanDeviceContext *hwctx = ctx->hwctx; + VulkanDevicePriv *s = ctx->internal->priv; + VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS]; + + const int planes = av_pix_fmt_count_planes(pix_fmt); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); + + VkCommandBufferBeginInfo cmd_start = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + + VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; + + VkSubmitInfo s_info = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &s->cmd.buf, + .pSignalSemaphores = frame->sem, + .pWaitSemaphores = frame->sem, + .pWaitDstStageMask = sem_wait_dst, + .signalSemaphoreCount = planes, + .waitSemaphoreCount = planes, + }; + + ret = vkBeginCommandBuffer(s->cmd.buf, &cmd_start); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Unable to init command buffer: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + /* Change the image layout to something more optimal for transfers */ + for (int i = 0; i < planes; i++) { + img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + img_bar[i].srcAccessMask = 0x0; + img_bar[i].dstAccessMask = to_buf ? VK_ACCESS_TRANSFER_READ_BIT : + VK_ACCESS_TRANSFER_WRITE_BIT; + img_bar[i].oldLayout = frame->layout[i]; + img_bar[i].newLayout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL : + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + img_bar[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + img_bar[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + img_bar[i].image = frame->img[i]; + img_bar[i].subresourceRange.levelCount = 1; + img_bar[i].subresourceRange.layerCount = 1; + img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + + sem_wait_dst[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + + frame->layout[i] = img_bar[i].newLayout; + frame->access[i] = img_bar[i].dstAccessMask; + } + + vkCmdPipelineBarrier(s->cmd.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + 0, NULL, 0, NULL, planes, img_bar); + + /* Schedule a copy for each plane */ + for (int i = 0; i < planes; i++) { + const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w; + const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h; + VkBufferImageCopy buf_reg = { + .bufferOffset = 0, + /* Buffer stride isn't in bytes, it's in samples, the implementation + * uses the image's VkFormat to know how many bytes per sample + * the buffer has. So we have to convert by dividing. Stupid. + * Won't work with YUVA or other planar formats with alpha. */ + .bufferRowLength = buf_stride[i] / desc->comp[i].step, + .bufferImageHeight = p_h, + .imageSubresource.layerCount = 1, + .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .imageOffset = { 0, 0, 0, }, + .imageExtent = { p_w, p_h, 1, }, + }; + + if (to_buf) + vkCmdCopyImageToBuffer(s->cmd.buf, frame->img[i], frame->layout[i], + buffer[i].buf, 1, &buf_reg); + else + vkCmdCopyBufferToImage(s->cmd.buf, buffer[i].buf, frame->img[i], + frame->layout[i], 1, &buf_reg); + } + + ret = vkEndCommandBuffer(s->cmd.buf); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + /* Wait for the download/upload to finish if uploading, otherwise the + * semaphore will take care of synchronization when uploading */ + ret = vkQueueSubmit(s->cmd.queue, 1, &s_info, s->cmd.fence); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } else { + vkWaitForFences(hwctx->act_dev, 1, &s->cmd.fence, VK_TRUE, UINT64_MAX); + vkResetFences(hwctx->act_dev, 1, &s->cmd.fence); + } + + return 0; +} + +/* Technically we can use VK_EXT_external_memory_host to upload and download, + * however the alignment requirements make this unfeasible as both the pointer + * and the size of each plane need to be aligned to the minimum alignment + * requirement, which on all current implementations (anv, radv) is 4096. + * If the requirement gets relaxed (unlikely) this can easily be implemented. */ +static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src) +{ + int err = 0; + AVFrame tmp; + AVVkFrame *f = (AVVkFrame *)dst->data[0]; + AVHWDeviceContext *dev_ctx = hwfc->device_ctx; + ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } }; + const int planes = av_pix_fmt_count_planes(src->format); + int log2_chroma = av_pix_fmt_desc_get(src->format)->log2_chroma_h; + + if ((src->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(src->format))) { + av_log(hwfc, AV_LOG_ERROR, "Unsupported source pixel format!\n"); + return AVERROR(EINVAL); + } + + if (src->width > hwfc->width || src->height > hwfc->height) + return AVERROR(EINVAL); + + /* For linear, host visiable images */ + if (f->tiling == VK_IMAGE_TILING_LINEAR && + f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + AVFrame *map = av_frame_alloc(); + if (!map) + return AVERROR(ENOMEM); + map->format = src->format; + + err = vulkan_map_frame_to_mem(hwfc, map, dst, AV_HWFRAME_MAP_WRITE); + if (err) + goto end; + + err = av_frame_copy(map, src); + av_frame_free(&map); + goto end; + } + + /* Create buffers */ + for (int i = 0; i < planes; i++) { + int h = src->height; + int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h; + + tmp.linesize[i] = src->linesize[i]; + err = create_buf(dev_ctx, &buf[i], p_height, + &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL); + if (err) + goto end; + } + + /* Map, copy image to buffer, unmap */ + if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 0))) + goto end; + + av_image_copy(tmp.data, tmp.linesize, (const uint8_t **)src->data, + src->linesize, src->format, src->width, src->height); + + if ((err = unmap_buffers(dev_ctx, buf, planes, 1))) + goto end; + + /* Copy buffers to image */ + err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize, + src->width, src->height, src->format, 0); + +end: + for (int i = 0; i < planes; i++) + free_buf(dev_ctx, &buf[i]); + + return err; +} + +static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src) +{ + av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + + switch (src->format) { +#if CONFIG_CUDA + case AV_PIX_FMT_CUDA: + if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) && + (p->extensions & EXT_EXTERNAL_FD_SEM)) + return vulkan_transfer_data_from_cuda(hwfc, dst, src); +#endif + default: + if (src->hw_frames_ctx) + return AVERROR(ENOSYS); + else + return vulkan_transfer_data_from_mem(hwfc, dst, src); + } +} + +#if CONFIG_CUDA +static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src) +{ + int err; + VkResult ret; + CUcontext dummy; + AVVkFrame *dst_f; + AVVkFrameInternal *dst_int; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); + + AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data; + AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; + AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; + AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; + CudaFunctions *cu = cu_internal->cuda_dl; + + ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + + dst_f = (AVVkFrame *)src->data[0]; + + err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src); + if (err < 0) { + goto fail; + } + + dst_int = dst_f->internal; + + for (int i = 0; i < planes; i++) { + CUDA_MEMCPY2D cpy = { + .dstMemoryType = CU_MEMORYTYPE_DEVICE, + .dstDevice = (CUdeviceptr)dst->data[i], + .dstPitch = dst->linesize[i], + .dstY = 0, + + .srcMemoryType = CU_MEMORYTYPE_ARRAY, + .srcArray = dst_int->cu_array[i], + .WidthInBytes = (i > 0 ? AV_CEIL_RSHIFT(hwfc->width, desc->log2_chroma_w) + : hwfc->width) * desc->comp[i].step, + .Height = i > 0 ? AV_CEIL_RSHIFT(hwfc->height, desc->log2_chroma_h) + : hwfc->height, + }; + + ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + } + + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + + av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n"); + + return 0; + +fail: + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + vulkan_free_internal(dst_int); + dst_f->internal = NULL; + av_buffer_unref(&dst->buf[0]); + return err; +} +#endif + +static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src) +{ + int err = 0; + AVFrame tmp; + AVVkFrame *f = (AVVkFrame *)src->data[0]; + AVHWDeviceContext *dev_ctx = hwfc->device_ctx; + ImageBuffer buf[AV_NUM_DATA_POINTERS] = { { 0 } }; + const int planes = av_pix_fmt_count_planes(dst->format); + int log2_chroma = av_pix_fmt_desc_get(dst->format)->log2_chroma_h; + + if (dst->width > hwfc->width || dst->height > hwfc->height) + return AVERROR(EINVAL); + + /* For linear, host visiable images */ + if (f->tiling == VK_IMAGE_TILING_LINEAR && + f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + AVFrame *map = av_frame_alloc(); + if (!map) + return AVERROR(ENOMEM); + map->format = dst->format; + + err = vulkan_map_frame_to_mem(hwfc, map, src, AV_HWFRAME_MAP_READ); + if (err) + return err; + + err = av_frame_copy(dst, map); + av_frame_free(&map); + return err; + } + + /* Create buffers */ + for (int i = 0; i < planes; i++) { + int h = dst->height; + int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h; + + tmp.linesize[i] = dst->linesize[i]; + err = create_buf(dev_ctx, &buf[i], p_height, + &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL); + } + + /* Copy image to buffer */ + if ((err = transfer_image_buf(dev_ctx, f, buf, tmp.linesize, + dst->width, dst->height, dst->format, 1))) + goto end; + + /* Map, copy buffer to frame, unmap */ + if ((err = map_buffers(dev_ctx, buf, tmp.data, planes, 1))) + goto end; + + av_image_copy(dst->data, dst->linesize, (const uint8_t **)tmp.data, + tmp.linesize, dst->format, dst->width, dst->height); + + err = unmap_buffers(dev_ctx, buf, planes, 0); + +end: + for (int i = 0; i < planes; i++) + free_buf(dev_ctx, &buf[i]); + + return err; +} + +static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src) +{ + av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + + switch (dst->format) { +#if CONFIG_CUDA + case AV_PIX_FMT_CUDA: + if ((p->extensions & EXT_EXTERNAL_FD_MEMORY) && + (p->extensions & EXT_EXTERNAL_FD_SEM)) + return vulkan_transfer_data_to_cuda(hwfc, dst, src); +#endif + default: + if (dst->hw_frames_ctx) + return AVERROR(ENOSYS); + else + return vulkan_transfer_data_to_mem(hwfc, dst, src); + } +} + +const HWContextType ff_hwcontext_type_vulkan = { + .type = AV_HWDEVICE_TYPE_VULKAN, + .name = "Vulkan", + + .device_hwctx_size = sizeof(AVVulkanDeviceContext), + .device_priv_size = sizeof(VulkanDevicePriv), + .frames_hwctx_size = sizeof(AVVulkanFramesContext), + .frames_priv_size = sizeof(VulkanFramesPriv), + + .device_init = &vulkan_device_init, + .device_create = &vulkan_device_create, + .device_derive = &vulkan_device_derive, + + .frames_get_constraints = &vulkan_frames_get_constraints, + .frames_init = vulkan_frames_init, + .frames_get_buffer = vulkan_get_buffer, + .frames_uninit = vulkan_frames_uninit, + + .transfer_get_formats = vulkan_transfer_get_formats, + .transfer_data_to = vulkan_transfer_data_to, + .transfer_data_from = vulkan_transfer_data_from, + + .map_to = vulkan_map_to, + .map_from = vulkan_map_from, + + .pix_fmts = (const enum AVPixelFormat []) { + AV_PIX_FMT_VULKAN, + AV_PIX_FMT_NONE + }, +}; diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h new file mode 100644 index 0000000000000..9067a6f624133 --- /dev/null +++ b/libavutil/hwcontext_vulkan.h @@ -0,0 +1,150 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_HWCONTEXT_VULKAN_H +#define AVUTIL_HWCONTEXT_VULKAN_H + +#include + +/** + * @file + * API-specific header for AV_HWDEVICE_TYPE_VULKAN. + * + * For user-allocated pools, AVHWFramesContext.pool must return AVBufferRefs + * with the data pointer set to an AVVkFrame. + */ + +/** + * Main Vulkan context, allocated as AVHWDeviceContext.hwctx. + * All of these can be set before init to change what the context uses + */ +typedef struct AVVulkanDeviceContext { + /** + * Custom memory allocator, else NULL + */ + const VkAllocationCallbacks *alloc; + /** + * Instance + */ + VkInstance inst; + /** + * Physical device + */ + VkPhysicalDevice phys_dev; + /** + * Activated physical device + */ + VkDevice act_dev; + /** + * Queue family index for graphics + */ + int queue_family_index; + /** + * Queue family index for transfer ops only. By default, the priority order + * is dedicated transfer > dedicated compute > graphics. + */ + int queue_family_tx_index; + /** + * Queue family index for compute ops. Will be equal to the graphics + * one unless a dedicated transfer queue is found. + */ + int queue_family_comp_index; + /** + * The UUID of the selected physical device. + */ + uint8_t device_uuid[VK_UUID_SIZE]; +} AVVulkanDeviceContext; + +/** + * Allocated as AVHWFramesContext.hwctx, used to set pool-specific options + */ +typedef struct AVVulkanFramesContext { + /** + * Controls the tiling of output frames. + */ + VkImageTiling tiling; + /** + * Defines extra usage of output frames. This is bitwise OR'd with the + * standard usage flags (SAMPLED, STORAGE, TRANSFER_SRC and TRANSFER_DST). + */ + VkImageUsageFlagBits usage; + /** + * Extension data for image creation. By default, if the extension is + * available, this will be chained to a VkImageFormatListCreateInfoKHR. + */ + void *create_pnext; + /** + * Extension data for memory allocation. Must have as many entries as + * the number of planes of the sw_format. + * This will be chained to VkExportMemoryAllocateInfo, which is used + * to make all pool images exportable to other APIs. + */ + void *alloc_pnext[AV_NUM_DATA_POINTERS]; +} AVVulkanFramesContext; + +/* + * Frame structure, the VkFormat of the image will always match + * the pool's sw_format. + * All frames, imported or allocated, will be created with the + * VK_IMAGE_CREATE_ALIAS_BIT flag set, so the memory may be aliased if needed. + */ +typedef struct AVVkFrame { + /** + * Vulkan images to which the memory is bound to. + */ + VkImage img[AV_NUM_DATA_POINTERS]; + + /** + * Same tiling must be used for all images. + */ + VkImageTiling tiling; + + /** + * Memory backing the images. Could be less than the amount of images + * if importing from a DRM or VAAPI frame. + */ + VkDeviceMemory mem[AV_NUM_DATA_POINTERS]; + size_t size[AV_NUM_DATA_POINTERS]; + + /** + * OR'd flags for all memory allocated + */ + VkMemoryPropertyFlagBits flags; + + /** + * Updated after every barrier + */ + VkAccessFlagBits access[AV_NUM_DATA_POINTERS]; + VkImageLayout layout[AV_NUM_DATA_POINTERS]; + + /** + * Per-image semaphores. Must not be freed manually. Must be waited on + * and signalled at every queue submission. + */ + VkSemaphore sem[AV_NUM_DATA_POINTERS]; + + /** + * Internal data. + */ + struct AVVkFrameInternal *internal; +} AVVkFrame; + +/* Returns the format of each image up to the number of planes for a given sw_format. */ +const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p); + +#endif /* AVUTIL_HWCONTEXT_VULKAN_H */ diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c index 05dd4a1e20d8a..64178b7d563b9 100644 --- a/libavutil/pixdesc.c +++ b/libavutil/pixdesc.c @@ -2344,6 +2344,10 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { }, .flags = AV_PIX_FMT_FLAG_PLANAR, }, + [AV_PIX_FMT_VULKAN] = { + .name = "vulkan", + .flags = AV_PIX_FMT_FLAG_HWACCEL, + }, }; #if FF_API_PLUS1_MINUS1 FF_ENABLE_DEPRECATION_WARNINGS diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h index 37ecebd50122e..b2bb91defd4c6 100644 --- a/libavutil/pixfmt.h +++ b/libavutil/pixfmt.h @@ -348,6 +348,9 @@ enum AVPixelFormat { AV_PIX_FMT_NV24, ///< planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (first byte U and the following byte V) AV_PIX_FMT_NV42, ///< as above, but U and V bytes are swapped + /* Vulkan hardware images, data[0] contain an AVVkFrame */ + AV_PIX_FMT_VULKAN, + AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions };