diff --git a/video/out/hwdec/hwdec_cuda.c b/video/out/hwdec/hwdec_cuda.c index 57e4fb40e3579..c438bfdf021a9 100644 --- a/video/out/hwdec/hwdec_cuda.c +++ b/video/out/hwdec/hwdec_cuda.c @@ -57,12 +57,12 @@ int check_cu(const struct ra_hwdec *hw, CUresult err, const char *func) #define CHECK_CU(x) check_cu(hw, (x), #x) -static const cuda_interop_init interop_inits[] = { +static const cuda_interop_fn *interop_fns[] = { #if HAVE_GL - cuda_gl_init, + &(static cuda_interop_fn) {.check = cuda_gl_check, .init = cuda_gl_init}, #endif #if HAVE_VULKAN - cuda_vk_init, + &(static cuda_interop_fn) {.check = cuda_gl_check, .init = cuda_gl_init}, #endif NULL }; @@ -75,23 +75,42 @@ static int cuda_init(struct ra_hwdec *hw) struct cuda_hw_priv *p = hw->priv; CudaFunctions *cu; int level = hw->probing ? MSGL_V : MSGL_ERR; + bool initialized = false; + + inline int backend_init(void) + { + ret = cuda_load_functions(&p->cu, NULL); + if (ret != 0) { + MP_MSG(hw, level, "Failed to load CUDA symbols\n"); + return -1; + } + cu = p->cu; - ret = cuda_load_functions(&p->cu, NULL); - if (ret != 0) { - MP_MSG(hw, level, "Failed to load CUDA symbols\n"); - return -1; - } - cu = p->cu; + ret = CHECK_CU(cu->cuInit(0)); + if (ret < 0) + ret = -1; - ret = CHECK_CU(cu->cuInit(0)); - if (ret < 0) - return -1; + return ret; + } // Initialise CUDA context from backend. - for (int i = 0; interop_inits[i]; i++) { - if (interop_inits[i](hw)) { - break; + // Note that the interop check doesn't require the CUDA backend to be initialized. + // This is important because cuInit wakes up the dgpu (even if the cuda hwdec won't be used!) + // Doing this allows us to check if CUDA should be used without waking up the dgpu, avoiding + // a few seconds of delay and improving battery life for laptops! + for (int i = 0; interop_fns[i]; i++) { + if (!interop_fns[i]->check(hw)) + continue; + + if (!initialized) { + ret = backend_init(); + if (ret < 0) + break; + initialized = true; } + + if (interop_fns[i]->init(hw)) + break; } if (!p->ext_init || !p->ext_uninit) { diff --git a/video/out/hwdec/hwdec_cuda.h b/video/out/hwdec/hwdec_cuda.h index 9c55053d59334..8c82860164e12 100644 --- a/video/out/hwdec/hwdec_cuda.h +++ b/video/out/hwdec/hwdec_cuda.h @@ -50,10 +50,17 @@ struct cuda_mapper_priv { void *ext[4]; }; -typedef bool (*cuda_interop_init)(const struct ra_hwdec *hw); +typedef struct cuda_interop_fn { + bool (*check)(const struct ra_hwdec *hw); + bool (*init)(const struct ra_hwdec *hw); +} cuda_interop_fn; + +bool cuda_gl_check(const struct ra_hwdec *hw); bool cuda_gl_init(const struct ra_hwdec *hw); +bool cuda_vk_check(const struct ra_hwdec *hw); + bool cuda_vk_init(const struct ra_hwdec *hw); int check_cu(const struct ra_hwdec *hw, CUresult err, const char *func); diff --git a/video/out/hwdec/hwdec_cuda_gl.c b/video/out/hwdec/hwdec_cuda_gl.c index f20540ed4d3cd..17b9d992c3af2 100644 --- a/video/out/hwdec/hwdec_cuda_gl.c +++ b/video/out/hwdec/hwdec_cuda_gl.c @@ -106,6 +106,18 @@ static void cuda_ext_gl_uninit(const struct ra_hwdec_mapper *mapper, int n) #undef CHECK_CU #define CHECK_CU(x) check_cu(hw, (x), #x) +bool cuda_gl_check(const struct ra_hwdec *hw) { + if (ra_is_gl(hw->ra_ctx->ra)) { + GL *gl = ra_gl_get(hw->ra_ctx->ra); + if (gl->version < 210 && gl->es < 300) { + MP_VERBOSE(hw, "need OpenGL >= 2.1 or OpenGL-ES >= 3.0\n"); + return false; + } + } + // This is not an OpenGL RA. + return false; +} + bool cuda_gl_init(const struct ra_hwdec *hw) { int ret = 0; struct cuda_hw_priv *p = hw->priv; diff --git a/video/out/hwdec/hwdec_cuda_vk.c b/video/out/hwdec/hwdec_cuda_vk.c index b9f8caa8150d0..69d8bf4f1247a 100644 --- a/video/out/hwdec/hwdec_cuda_vk.c +++ b/video/out/hwdec/hwdec_cuda_vk.c @@ -272,13 +272,9 @@ static bool cuda_ext_vk_signal(const struct ra_hwdec_mapper *mapper, int n) #undef CHECK_CU #define CHECK_CU(x) check_cu(hw, (x), #x) -bool cuda_vk_init(const struct ra_hwdec *hw) { - int ret = 0; - int level = hw->probing ? MSGL_V : MSGL_ERR; - struct cuda_hw_priv *p = hw->priv; - CudaFunctions *cu = p->cu; - +bool cuda_vk_check(const struct ra_hwdec *hw) { pl_gpu gpu = ra_pl_get(hw->ra_ctx->ra); + if (gpu != NULL) { if (!(gpu->export_caps.tex & HANDLE_TYPE)) { MP_VERBOSE(hw, "CUDA hwdec with Vulkan requires exportable texture memory of type 0x%X.\n", @@ -289,10 +285,17 @@ bool cuda_vk_init(const struct ra_hwdec *hw) { HANDLE_TYPE); return false; } - } else { - // This is not a Vulkan RA. - return false; } + // This is not a Vulkan RA. + return false; +} + +bool cuda_vk_init(const struct ra_hwdec *hw) { + int ret = 0; + int level = hw->probing ? MSGL_V : MSGL_ERR; + struct cuda_hw_priv *p = hw->priv; + CudaFunctions *cu = p->cu; + pl_gpu gpu = ra_pl_get(hw->ra_ctx->ra); if (!cu->cuImportExternalMemory) { MP_MSG(hw, level, "CUDA hwdec with Vulkan requires driver version 410.48 or newer.\n");