Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EC/CUDA: memops supported by default in cuda 12.0 #700

Merged
merged 2 commits into from
Dec 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 8 additions & 4 deletions config/m4/cuda.m4
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ ARCH110="-gencode=arch=compute_80,code=sm_80 \
-gencode=arch=compute_80,code=compute_80"
ARCH111="-gencode=arch=compute_86,code=sm_86 \
-gencode=arch=compute_86,code=compute_86"
ARCH120="-gencode=arch=compute_90,code=sm_90 \
-gencode=arch=compute_90,code=compute_90"

AC_DEFUN([CHECK_CUDA],[
AS_IF([test "x$cuda_checked" != "xyes"],
Expand Down Expand Up @@ -106,10 +108,12 @@ AS_IF([test "x$cuda_checked" != "xyes"],
[NVCC_CFLAGS="$NVCC_CFLAGS -O3 -g -DNDEBUG"])
AS_IF([test "x$cuda_happy" = "xyes"],
[AS_IF([test "x$with_nvcc_gencode" = "xdefault"],
[AS_IF([test $CUDA_MAJOR_VERSION -eq 11],
[AS_IF([test $CUDA_MINOR_VERSION -lt 1],
[NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110}"],
[NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110} ${ARCH111}"])])],
[AS_IF([test $CUDA_MAJOR_VERSION -eq 12],
[NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110} ${ARCH111} ${ARCH120}"],
[AS_IF([test $CUDA_MAJOR_VERSION -eq 11],
[AS_IF([test $CUDA_MINOR_VERSION -lt 1],
[NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110}"],
[NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110} ${ARCH111}"])])])],
[NVCC_ARCH="$with_nvcc_gencode"])
AC_SUBST([NVCC_ARCH], ["$NVCC_ARCH"])])
LDFLAGS="$save_LDFLAGS"
Expand Down
12 changes: 7 additions & 5 deletions src/components/ec/cuda/ec_cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -243,11 +243,8 @@ static ucc_status_t ucc_ec_cuda_init(const ucc_ec_params_t *ec_params)
{
ucc_ec_cuda_config_t *cfg = EC_CUDA_CONFIG;
ucc_status_t status;
int device, num_devices, attr;
CUdevice cu_dev;
CUresult cu_st;
int device, num_devices;
cudaError_t cuda_st;
const char *cu_err_st_str;
struct cudaDeviceProp prop;
int supportsCoopLaunch = 0;

Expand Down Expand Up @@ -351,9 +348,13 @@ static ucc_status_t ucc_ec_cuda_init(const ucc_ec_params_t *ec_params)
} else {
ucc_ec_cuda.strm_task_mode = UCC_EC_CUDA_TASK_MEM_OPS;
ucc_ec_cuda.post_strm_task = ucc_ec_cuda_post_driver_stream_task;

#if CUDA_VERSION < 12000
CUresult cu_st;
CUdevice cu_dev;
int attr;
cu_st = cuCtxGetDevice(&cu_dev);
if (cu_st != CUDA_SUCCESS){
const char *cu_err_st_str;
cuGetErrorString(cu_st, &cu_err_st_str);
ec_debug(&ucc_ec_cuda.super, "cuCtxGetDevice() failed: %s",
cu_err_st_str);
Expand All @@ -376,6 +377,7 @@ static ucc_status_t ucc_ec_cuda_init(const ucc_ec_params_t *ec_params)
"CUDA MEM OPS are not supported or disabled");
return UCC_ERR_NOT_SUPPORTED;
}
#endif
}

if (cfg->use_cooperative_launch == 1) {
Expand Down
8 changes: 6 additions & 2 deletions src/components/tl/nccl/tl_nccl_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,13 +109,14 @@ UCC_CLASS_INIT_FUNC(ucc_tl_nccl_context_t,
ucc_derived_of(config, ucc_tl_nccl_context_config_t);
int mem_ops_attr = 0;
ucc_status_t status;
CUresult cu_st;
CUdevice cu_dev;

UCC_CLASS_CALL_SUPER_INIT(ucc_tl_context_t, &tl_nccl_config->super,
params->context);
memcpy(&self->cfg, tl_nccl_config, sizeof(*tl_nccl_config));
if (self->cfg.sync_type != UCC_TL_NCCL_COMPLETION_SYNC_TYPE_EVENT) {
#if CUDA_VERSION < 12000
CUresult cu_st;
CUdevice cu_dev;
cu_st = cuCtxGetDevice(&cu_dev);
if (cu_st == CUDA_SUCCESS) {
cu_st = cuDeviceGetAttribute(&mem_ops_attr,
Expand All @@ -124,6 +125,9 @@ UCC_CLASS_INIT_FUNC(ucc_tl_nccl_context_t,
} else {
tl_info(self->super.super.lib, "failed to get cuda device");
}
#else
mem_ops_attr = 1;
#endif
if (mem_ops_attr == 0) {
if (self->cfg.sync_type == UCC_TL_NCCL_COMPLETION_SYNC_TYPE_MEMOPS) {
tl_error(self->super.super.lib, "memops not supported");
Expand Down