openucx · Sergei-Lebedev · Dec 28, 2022 · Dec 22, 2022 · Dec 26, 2022
diff --git a/config/m4/cuda.m4 b/config/m4/cuda.m4
@@ -17,6 +17,8 @@ ARCH110="-gencode=arch=compute_80,code=sm_80 \
 -gencode=arch=compute_80,code=compute_80"
 ARCH111="-gencode=arch=compute_86,code=sm_86 \
 -gencode=arch=compute_86,code=compute_86"
+ARCH120="-gencode=arch=compute_90,code=sm_90 \
+-gencode=arch=compute_90,code=compute_90"
 
 AC_DEFUN([CHECK_CUDA],[
 AS_IF([test "x$cuda_checked" != "xyes"],
@@ -106,10 +108,12 @@ AS_IF([test "x$cuda_checked" != "xyes"],
                [NVCC_CFLAGS="$NVCC_CFLAGS -O3 -g -DNDEBUG"])
          AS_IF([test "x$cuda_happy" = "xyes"],
                [AS_IF([test "x$with_nvcc_gencode" = "xdefault"],
-                      [AS_IF([test $CUDA_MAJOR_VERSION -eq 11],
-			     [AS_IF([test $CUDA_MINOR_VERSION -lt 1],
-                                    [NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110}"],
-                                    [NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110} ${ARCH111}"])])],
+                      [AS_IF([test $CUDA_MAJOR_VERSION -eq 12],
+                             [NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110} ${ARCH111} ${ARCH120}"],
+                             [AS_IF([test $CUDA_MAJOR_VERSION -eq 11],
+                                   [AS_IF([test $CUDA_MINOR_VERSION -lt 1],
+                                           [NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110}"],
+                                           [NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110} ${ARCH111}"])])])],
                       [NVCC_ARCH="$with_nvcc_gencode"])
                 AC_SUBST([NVCC_ARCH], ["$NVCC_ARCH"])])
          LDFLAGS="$save_LDFLAGS"

diff --git a/src/components/ec/cuda/ec_cuda.c b/src/components/ec/cuda/ec_cuda.c
@@ -243,11 +243,8 @@ static ucc_status_t ucc_ec_cuda_init(const ucc_ec_params_t *ec_params)
 {
     ucc_ec_cuda_config_t *cfg = EC_CUDA_CONFIG;
     ucc_status_t          status;
-    int                   device, num_devices, attr;
-    CUdevice              cu_dev;
-    CUresult              cu_st;
+    int                   device, num_devices;
     cudaError_t           cuda_st;
-    const char           *cu_err_st_str;
     struct cudaDeviceProp prop;
     int                   supportsCoopLaunch = 0;
 
@@ -351,9 +348,13 @@ static ucc_status_t ucc_ec_cuda_init(const ucc_ec_params_t *ec_params)
     } else {
         ucc_ec_cuda.strm_task_mode = UCC_EC_CUDA_TASK_MEM_OPS;
         ucc_ec_cuda.post_strm_task = ucc_ec_cuda_post_driver_stream_task;
-
+#if CUDA_VERSION < 12000
+        CUresult cu_st;
+        CUdevice cu_dev;
+        int attr;
         cu_st = cuCtxGetDevice(&cu_dev);
         if (cu_st != CUDA_SUCCESS){
+            const char *cu_err_st_str;
             cuGetErrorString(cu_st, &cu_err_st_str);
             ec_debug(&ucc_ec_cuda.super, "cuCtxGetDevice() failed: %s",
                      cu_err_st_str);
@@ -376,6 +377,7 @@ static ucc_status_t ucc_ec_cuda_init(const ucc_ec_params_t *ec_params)
                      "CUDA MEM OPS are not supported or disabled");
             return UCC_ERR_NOT_SUPPORTED;
         }
+#endif
     }
 
     if (cfg->use_cooperative_launch == 1) {

diff --git a/src/components/tl/nccl/tl_nccl_context.c b/src/components/tl/nccl/tl_nccl_context.c
@@ -109,13 +109,14 @@ UCC_CLASS_INIT_FUNC(ucc_tl_nccl_context_t,
         ucc_derived_of(config, ucc_tl_nccl_context_config_t);
     int mem_ops_attr = 0;
     ucc_status_t status;
-    CUresult cu_st;
-    CUdevice cu_dev;
 
     UCC_CLASS_CALL_SUPER_INIT(ucc_tl_context_t, &tl_nccl_config->super,
                               params->context);
     memcpy(&self->cfg, tl_nccl_config, sizeof(*tl_nccl_config));
     if (self->cfg.sync_type != UCC_TL_NCCL_COMPLETION_SYNC_TYPE_EVENT) {
+#if CUDA_VERSION < 12000
+        CUresult cu_st;
+        CUdevice cu_dev;
         cu_st = cuCtxGetDevice(&cu_dev);
         if (cu_st == CUDA_SUCCESS) {
             cu_st = cuDeviceGetAttribute(&mem_ops_attr,
@@ -124,6 +125,9 @@ UCC_CLASS_INIT_FUNC(ucc_tl_nccl_context_t,
         } else {
             tl_info(self->super.super.lib, "failed to get cuda device");
         }
+#else
+        mem_ops_attr = 1;
+#endif
         if (mem_ops_attr == 0) {
             if (self->cfg.sync_type == UCC_TL_NCCL_COMPLETION_SYNC_TYPE_MEMOPS) {
                 tl_error(self->super.super.lib, "memops not supported");