From 93d7c6c0025fc8553d4d1298473350facbc87c2c Mon Sep 17 00:00:00 2001 From: Jrelvas <55360900+jrelvas-ipc@users.noreply.github.com> Date: Tue, 30 Apr 2024 21:17:52 +0100 Subject: [PATCH] hwdec_cuda: avoid gpu wakeup by deferring cuInit `cuInit` wakes up the nvidia dgpu on nvidia laptops. This is bad news because the wake up process is blocking and takes a few seconds. It also needlessly increases power consumption. Sometimes, a VO loads several hwdecs (like `dmabuf_wayland`). When `cuda` is loaded, it calls `cuInit` before running all interop inits. However, the first checks in the interops do not require cuda initialization, so we only need to call `cuInit` after those checks. This commit splits the interop `init` function into `check` and `init`. `check` can be called without initializing the Cuda backend, so cuInit is only called *after* the first interop check. With these changes, there's no cuda initialization if no OpenGL/Vulkan backend is available. This prevents `dmabuf_wayland` and other VOs which automatically load cuda from waking up the nvidia dgpu unnecessarily, making them start faster and decreasing power consumption on laptops. Fixes: https://github.com/mpv-player/mpv/issues/13668 --- video/out/hwdec/hwdec_cuda.c | 47 ++++++++++++++++++++------------- video/out/hwdec/hwdec_cuda.h | 9 ++++--- video/out/hwdec/hwdec_cuda_gl.c | 19 +++++++++---- video/out/hwdec/hwdec_cuda_vk.c | 21 ++++++++++----- 4 files changed, 64 insertions(+), 32 deletions(-) diff --git a/video/out/hwdec/hwdec_cuda.c b/video/out/hwdec/hwdec_cuda.c index 57e4fb40e357..8987cf3407ec 100644 --- a/video/out/hwdec/hwdec_cuda.c +++ b/video/out/hwdec/hwdec_cuda.c @@ -57,12 +57,12 @@ int check_cu(const struct ra_hwdec *hw, CUresult err, const char *func) #define CHECK_CU(x) check_cu(hw, (x), #x) -static const cuda_interop_init interop_inits[] = { +static const struct cuda_interop_fn *interop_fns[] = { #if HAVE_GL - cuda_gl_init, + &cuda_gl_fn, #endif #if HAVE_VULKAN - cuda_vk_init, + &cuda_vk_fn, #endif NULL }; @@ -73,25 +73,36 @@ static int cuda_init(struct ra_hwdec *hw) CUcontext dummy; int ret = 0; struct cuda_hw_priv *p = hw->priv; - CudaFunctions *cu; + CudaFunctions *cu = NULL; int level = hw->probing ? MSGL_V : MSGL_ERR; - - ret = cuda_load_functions(&p->cu, NULL); - if (ret != 0) { - MP_MSG(hw, level, "Failed to load CUDA symbols\n"); - return -1; - } - cu = p->cu; - - ret = CHECK_CU(cu->cuInit(0)); - if (ret < 0) - return -1; + bool initialized = false; // Initialise CUDA context from backend. - for (int i = 0; interop_inits[i]; i++) { - if (interop_inits[i](hw)) { - break; + // Note that the interop check doesn't require the CUDA backend to be initialized. + // This is important because cuInit wakes up the dgpu (even if the cuda hwdec won't be used!) + // Doing this allows us to check if CUDA should be used without waking up the dgpu, avoiding + // a few seconds of delay and improving battery life for laptops! + for (int i = 0; interop_fns[i]; i++) { + if (!interop_fns[i]->check(hw)) + continue; + + if (!initialized) { + ret = cuda_load_functions(&p->cu, NULL); + if (ret != 0) { + MP_MSG(hw, level, "Failed to load CUDA symbols\n"); + return -1; + } + + cu = p->cu; + ret = CHECK_CU(cu->cuInit(0)); + if (ret < 0) + return -1; + + initialized = true; } + + if (interop_fns[i]->init(hw)) + break; } if (!p->ext_init || !p->ext_uninit) { diff --git a/video/out/hwdec/hwdec_cuda.h b/video/out/hwdec/hwdec_cuda.h index 9c55053d5933..6e671b364e3e 100644 --- a/video/out/hwdec/hwdec_cuda.h +++ b/video/out/hwdec/hwdec_cuda.h @@ -50,10 +50,13 @@ struct cuda_mapper_priv { void *ext[4]; }; -typedef bool (*cuda_interop_init)(const struct ra_hwdec *hw); +struct cuda_interop_fn { + bool (*check)(const struct ra_hwdec *hw); + bool (*init)(const struct ra_hwdec *hw); +}; -bool cuda_gl_init(const struct ra_hwdec *hw); +extern struct cuda_interop_fn cuda_gl_fn; -bool cuda_vk_init(const struct ra_hwdec *hw); +extern struct cuda_interop_fn cuda_vk_fn; int check_cu(const struct ra_hwdec *hw, CUresult err, const char *func); diff --git a/video/out/hwdec/hwdec_cuda_gl.c b/video/out/hwdec/hwdec_cuda_gl.c index f20540ed4d3c..d4c0104c8daa 100644 --- a/video/out/hwdec/hwdec_cuda_gl.c +++ b/video/out/hwdec/hwdec_cuda_gl.c @@ -106,11 +106,7 @@ static void cuda_ext_gl_uninit(const struct ra_hwdec_mapper *mapper, int n) #undef CHECK_CU #define CHECK_CU(x) check_cu(hw, (x), #x) -bool cuda_gl_init(const struct ra_hwdec *hw) { - int ret = 0; - struct cuda_hw_priv *p = hw->priv; - CudaFunctions *cu = p->cu; - +static bool cuda_gl_check(const struct ra_hwdec *hw) { if (ra_is_gl(hw->ra_ctx->ra)) { GL *gl = ra_gl_get(hw->ra_ctx->ra); if (gl->version < 210 && gl->es < 300) { @@ -122,6 +118,14 @@ bool cuda_gl_init(const struct ra_hwdec *hw) { return false; } + return true; +} + +static bool cuda_gl_init(const struct ra_hwdec *hw) { + int ret = 0; + struct cuda_hw_priv *p = hw->priv; + CudaFunctions *cu = p->cu; + CUdevice display_dev; unsigned int device_count; ret = CHECK_CU(cu->cuGLGetDevices(&device_count, &display_dev, 1, @@ -172,3 +176,8 @@ bool cuda_gl_init(const struct ra_hwdec *hw) { return true; } + +struct cuda_interop_fn cuda_gl_fn = { + .check = cuda_gl_check, + .init = cuda_gl_init +}; diff --git a/video/out/hwdec/hwdec_cuda_vk.c b/video/out/hwdec/hwdec_cuda_vk.c index b9f8caa8150d..e1aca5be1693 100644 --- a/video/out/hwdec/hwdec_cuda_vk.c +++ b/video/out/hwdec/hwdec_cuda_vk.c @@ -272,12 +272,7 @@ static bool cuda_ext_vk_signal(const struct ra_hwdec_mapper *mapper, int n) #undef CHECK_CU #define CHECK_CU(x) check_cu(hw, (x), #x) -bool cuda_vk_init(const struct ra_hwdec *hw) { - int ret = 0; - int level = hw->probing ? MSGL_V : MSGL_ERR; - struct cuda_hw_priv *p = hw->priv; - CudaFunctions *cu = p->cu; - +static bool cuda_vk_check(const struct ra_hwdec *hw) { pl_gpu gpu = ra_pl_get(hw->ra_ctx->ra); if (gpu != NULL) { if (!(gpu->export_caps.tex & HANDLE_TYPE)) { @@ -294,6 +289,16 @@ bool cuda_vk_init(const struct ra_hwdec *hw) { return false; } + return true; +} + +static bool cuda_vk_init(const struct ra_hwdec *hw) { + int ret = 0; + int level = hw->probing ? MSGL_V : MSGL_ERR; + struct cuda_hw_priv *p = hw->priv; + CudaFunctions *cu = p->cu; + pl_gpu gpu = ra_pl_get(hw->ra_ctx->ra); + if (!cu->cuImportExternalMemory) { MP_MSG(hw, level, "CUDA hwdec with Vulkan requires driver version 410.48 or newer.\n"); return false; @@ -342,3 +347,7 @@ bool cuda_vk_init(const struct ra_hwdec *hw) { return true; } +struct cuda_interop_fn cuda_vk_fn = { + .check = cuda_vk_check, + .init = cuda_vk_init +};