diff --git a/ompi/datatype/ompi_datatype.h b/ompi/datatype/ompi_datatype.h index 26978d0867e..2b17884fef7 100644 --- a/ompi/datatype/ompi_datatype.h +++ b/ompi/datatype/ompi_datatype.h @@ -508,5 +508,11 @@ ompi_datatype_consolidate_free(ompi_datatype_consolidate_t *dtmod) */ #define OMPI_DATATYPE_CONSOLIDATE_THRESHOLD 250 +static void* +BUF_START(void *userbuf, MPI_Datatype dt) +{ + return userbuf + dt->super.true_lb; +} + END_C_DECLS #endif /* OMPI_DATATYPE_H_HAS_BEEN_INCLUDED */ diff --git a/ompi/mca/coll/cuda/coll_cuda_allreduce.c b/ompi/mca/coll/cuda/coll_cuda_allreduce.c index 416c9c7fa8f..79376f8504b 100644 --- a/ompi/mca/coll/cuda/coll_cuda_allreduce.c +++ b/ompi/mca/coll/cuda/coll_cuda_allreduce.c @@ -3,6 +3,7 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2021 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,7 +42,7 @@ mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count, bufsize = opal_datatype_span(&dtype->super, count, &gap); - if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) { + if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs(BUF_START((char *)sbuf, dtype), NULL))) { sbuf1 = (char*)malloc(bufsize); if (NULL == sbuf1) { return OMPI_ERR_OUT_OF_RESOURCE; @@ -50,7 +51,7 @@ mca_coll_cuda_allreduce(const void *sbuf, void *rbuf, int count, sbuf = sbuf1 - gap; } - if (opal_cuda_check_bufs(rbuf, NULL)) { + if (opal_cuda_check_bufs(BUF_START(rbuf, dtype), NULL)) { rbuf1 = (char*)malloc(bufsize); if (NULL == rbuf1) { if (NULL != sbuf1) free(sbuf1); diff --git a/ompi/mca/coll/cuda/coll_cuda_exscan.c b/ompi/mca/coll/cuda/coll_cuda_exscan.c index 5f736697fe0..2fcfc5ebdc4 100644 --- a/ompi/mca/coll/cuda/coll_cuda_exscan.c +++ b/ompi/mca/coll/cuda/coll_cuda_exscan.c @@ -3,6 +3,7 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2021 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,7 +34,7 @@ int mca_coll_cuda_exscan(const void *sbuf, void *rbuf, int count, bufsize = opal_datatype_span(&dtype->super, count, &gap); - if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) { + if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs(BUF_START((char *)sbuf, dtype), NULL))) { sbuf1 = (char*)malloc(bufsize); if (NULL == sbuf1) { return OMPI_ERR_OUT_OF_RESOURCE; @@ -42,7 +43,7 @@ int mca_coll_cuda_exscan(const void *sbuf, void *rbuf, int count, sbuf = sbuf1 - gap; } - if (opal_cuda_check_bufs(rbuf, NULL)) { + if (opal_cuda_check_bufs(BUF_START(rbuf, dtype), NULL)) { rbuf1 = (char*)malloc(bufsize); if (NULL == rbuf1) { if (NULL != sbuf1) free(sbuf1); diff --git a/ompi/mca/coll/cuda/coll_cuda_reduce.c b/ompi/mca/coll/cuda/coll_cuda_reduce.c index 5d82667b6bb..6ad5d54dc59 100644 --- a/ompi/mca/coll/cuda/coll_cuda_reduce.c +++ b/ompi/mca/coll/cuda/coll_cuda_reduce.c @@ -3,6 +3,7 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2021 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,7 +44,7 @@ mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count, bufsize = opal_datatype_span(&dtype->super, count, &gap); - if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) { + if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs(BUF_START((char *)sbuf, dtype), NULL))) { sbuf1 = (char*)malloc(bufsize); if (NULL == sbuf1) { return OMPI_ERR_OUT_OF_RESOURCE; @@ -53,7 +54,7 @@ mca_coll_cuda_reduce(const void *sbuf, void *rbuf, int count, sbuf = sbuf1 - gap; } - if (opal_cuda_check_bufs(rbuf, NULL)) { + if (opal_cuda_check_bufs(BUF_START(rbuf, dtype), NULL)) { rbuf1 = (char*)malloc(bufsize); if (NULL == rbuf1) { if (NULL != sbuf1) free(sbuf1); diff --git a/ompi/mca/coll/cuda/coll_cuda_reduce_scatter_block.c b/ompi/mca/coll/cuda/coll_cuda_reduce_scatter_block.c index 907257b0da8..cd1ba5894fe 100644 --- a/ompi/mca/coll/cuda/coll_cuda_reduce_scatter_block.c +++ b/ompi/mca/coll/cuda/coll_cuda_reduce_scatter_block.c @@ -3,6 +3,7 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2021 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -47,7 +48,7 @@ mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount, sbufsize = rbufsize * ompi_comm_size(comm); - if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) { + if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs(BUF_START((char *)sbuf, dtype), NULL))) { sbuf1 = (char*)malloc(sbufsize); if (NULL == sbuf1) { return OMPI_ERR_OUT_OF_RESOURCE; @@ -56,7 +57,7 @@ mca_coll_cuda_reduce_scatter_block(const void *sbuf, void *rbuf, int rcount, sbuf = sbuf1 - gap; } - if (opal_cuda_check_bufs(rbuf, NULL)) { + if (opal_cuda_check_bufs(BUF_START(rbuf, dtype), NULL)) { rbuf1 = (char*)malloc(rbufsize); if (NULL == rbuf1) { if (NULL != sbuf1) free(sbuf1); diff --git a/ompi/mca/coll/cuda/coll_cuda_scan.c b/ompi/mca/coll/cuda/coll_cuda_scan.c index 4e7300c12f8..24e95753d90 100644 --- a/ompi/mca/coll/cuda/coll_cuda_scan.c +++ b/ompi/mca/coll/cuda/coll_cuda_scan.c @@ -3,6 +3,7 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2014-2015 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2021 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,7 +41,7 @@ int mca_coll_cuda_scan(const void *sbuf, void *rbuf, int count, bufsize = opal_datatype_span(&dtype->super, count, &gap); - if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs((char *)sbuf, NULL))) { + if ((MPI_IN_PLACE != sbuf) && (opal_cuda_check_bufs(BUF_START((char *)sbuf, dtype), NULL))) { sbuf1 = (char*)malloc(bufsize); if (NULL == sbuf1) { return OMPI_ERR_OUT_OF_RESOURCE; @@ -49,7 +50,7 @@ int mca_coll_cuda_scan(const void *sbuf, void *rbuf, int count, sbuf = sbuf1 - gap; } - if (opal_cuda_check_bufs(rbuf, NULL)) { + if (opal_cuda_check_bufs(BUF_START(rbuf, dtype), NULL)) { rbuf1 = (char*)malloc(bufsize); if (NULL == rbuf1) { if (NULL != sbuf1) free(sbuf1); diff --git a/ompi/mca/coll/libnbc/nbc_ialltoall.c b/ompi/mca/coll/libnbc/nbc_ialltoall.c index ebed3c4c3b2..78a57cd8155 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoall.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoall.c @@ -10,7 +10,7 @@ * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2017-2021 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * @@ -146,7 +146,7 @@ static int nbc_alltoall_init(const void* sendbuf, int sendcount, MPI_Datatype se /* phase 1 - rotate n data blocks upwards into the tmpbuffer */ #if OPAL_CUDA_SUPPORT - if (NBC_Type_intrinsic(sendtype) && !(opal_cuda_check_bufs((char *)sendbuf, (char *)recvbuf))) { + if (NBC_Type_intrinsic(sendtype) && !(opal_cuda_check_bufs(BUF_START((char *)sendbuf, sendtype), BUF_START((char *)recvbuf, recvtype)))) { #else if (NBC_Type_intrinsic(sendtype)) { #endif /* OPAL_CUDA_SUPPORT */ diff --git a/ompi/mca/common/ompio/common_ompio_buffer.c b/ompi/mca/common/ompio/common_ompio_buffer.c index dbd7e30e6b4..48eaff52682 100644 --- a/ompi/mca/common/ompio/common_ompio_buffer.c +++ b/ompi/mca/common/ompio/common_ompio_buffer.c @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2019 University of Houston. All rights reserved. + * Copyright (c) 2021 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,8 +40,9 @@ static void* mca_common_ompio_buffer_alloc_seg ( void *ctx, size_t *size ); static void mca_common_ompio_buffer_free_seg ( void *ctx, void *buf ); #if OPAL_CUDA_SUPPORT -void mca_common_ompio_check_gpu_buf ( ompio_file_t *fh, const void *buf, int *is_gpu, - int *is_managed) +void mca_common_ompio_check_gpu_buf ( ompio_file_t *fh, const void *buf, + struct ompi_datatype_t *datatype, + int *is_gpu, int *is_managed) { opal_convertor_t convertor; @@ -48,6 +50,7 @@ void mca_common_ompio_check_gpu_buf ( ompio_file_t *fh, const void *buf, int *is *is_managed=0; convertor.flags=0; + convertor.pDesc = &datatype->super; if ( opal_cuda_check_one_buf ( (char *)buf, &convertor ) ) { *is_gpu = 1; if ( convertor.flags & CONVERTOR_CUDA_UNIFIED ){ diff --git a/ompi/mca/common/ompio/common_ompio_buffer.h b/ompi/mca/common/ompio/common_ompio_buffer.h index 2e5e7fcbb4c..480fa34ed54 100644 --- a/ompi/mca/common/ompio/common_ompio_buffer.h +++ b/ompi/mca/common/ompio/common_ompio_buffer.h @@ -11,6 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2019 University of Houston. All rights reserved. + * Copyright (c) 2021 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -60,6 +61,7 @@ #if OPAL_CUDA_SUPPORT void mca_common_ompio_check_gpu_buf ( ompio_file_t *fh, const void *buf, + struct ompi_datatype_t *datatype, int *is_gpu, int *is_managed); #endif int mca_common_ompio_buffer_alloc_init ( void ); diff --git a/ompi/mca/common/ompio/common_ompio_file_read.c b/ompi/mca/common/ompio/common_ompio_file_read.c index 695b291fe76..a8b32bf0ead 100644 --- a/ompi/mca/common/ompio/common_ompio_file_read.c +++ b/ompi/mca/common/ompio/common_ompio_file_read.c @@ -12,6 +12,7 @@ * Copyright (c) 2008-2019 University of Houston. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2021 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -92,7 +93,7 @@ int mca_common_ompio_file_read (ompio_file_t *fh, opal_convertor_t convertor; #if OPAL_CUDA_SUPPORT int is_gpu, is_managed; - mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed); + mca_common_ompio_check_gpu_buf ( fh, buf, datatype, &is_gpu, &is_managed); if ( is_gpu && !is_managed ) { need_to_copy = true; } @@ -271,7 +272,7 @@ int mca_common_ompio_file_iread (ompio_file_t *fh, #if OPAL_CUDA_SUPPORT int is_gpu, is_managed; - mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed); + mca_common_ompio_check_gpu_buf ( fh, buf, datatype, &is_gpu, &is_managed); if ( is_gpu && !is_managed ) { need_to_copy = true; } diff --git a/ompi/mca/common/ompio/common_ompio_file_write.c b/ompi/mca/common/ompio/common_ompio_file_write.c index 066afb8844a..ea4d93d17ea 100644 --- a/ompi/mca/common/ompio/common_ompio_file_write.c +++ b/ompi/mca/common/ompio/common_ompio_file_write.c @@ -12,6 +12,7 @@ * Copyright (c) 2008-2019 University of Houston. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2021 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -73,7 +74,7 @@ int mca_common_ompio_file_write (ompio_file_t *fh, #if OPAL_CUDA_SUPPORT int is_gpu, is_managed; - mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed); + mca_common_ompio_check_gpu_buf ( fh, buf, datatype, &is_gpu, &is_managed); if ( is_gpu && !is_managed ) { need_to_copy = true; } @@ -246,7 +247,7 @@ int mca_common_ompio_file_iwrite (ompio_file_t *fh, #if OPAL_CUDA_SUPPORT int is_gpu, is_managed; - mca_common_ompio_check_gpu_buf ( fh, buf, &is_gpu, &is_managed); + mca_common_ompio_check_gpu_buf ( fh, buf, datatype, &is_gpu, &is_managed); if ( is_gpu && !is_managed ) { need_to_copy = true; } diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index e08265b42bc..e5a243cfa3a 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -15,6 +15,7 @@ * Copyright (c) 2013-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 Intel, Inc. All rights reserved + * Copyright (c) 2021 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -580,6 +581,11 @@ int32_t opal_convertor_prepare_for_recv(opal_convertor_t *convertor, convertor->flags |= CONVERTOR_RECV; #if OPAL_CUDA_SUPPORT if (!(convertor->flags & CONVERTOR_SKIP_CUDA_INIT)) { + /* setting a couple fields from CONVERTOR_PREPARE early + * so that the cuda code can figure out what offsets + * from pUserBuf to look at + */ + convertor->pDesc = (opal_datatype_t*)datatype; \ mca_cuda_convertor_init(convertor, pUserBuf); } #endif @@ -622,6 +628,7 @@ int32_t opal_convertor_prepare_for_send(opal_convertor_t *convertor, convertor->flags |= CONVERTOR_SEND; #if OPAL_CUDA_SUPPORT if (!(convertor->flags & CONVERTOR_SKIP_CUDA_INIT)) { + convertor->pDesc = (opal_datatype_t*)datatype; \ mca_cuda_convertor_init(convertor, pUserBuf); } #endif diff --git a/opal/mca/common/cuda/common_cuda.c b/opal/mca/common/cuda/common_cuda.c index 2fdc4b100e3..bd2f325a294 100644 --- a/opal/mca/common/cuda/common_cuda.c +++ b/opal/mca/common/cuda/common_cuda.c @@ -14,6 +14,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2021 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -1698,6 +1699,25 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t CUmemorytype memType = 0; CUdeviceptr dbuf = (CUdeviceptr) pUserBuf; CUcontext ctx = NULL, memCtx = NULL; + + /* + * If a convertor is provided, it needs to have a .pDesc set + * and then we'll offset to the true_lb to find the address of the + * actual data, as pUserBuf by itself isn't a meaningfull address + * when used with MPI datatypes. But if convertor is null we'll just + * use pUserBuf directly. + */ + if (NULL != convertor) { + dbuf = (CUdeviceptr)((char*)pUserBuf + convertor->pDesc.true_lb); + /* I'm not doing anything with count and true_ub to locate + * the top byte or any interior bytes mainly because I don't + * even know what I'd want this function to do if the MPI + * datatype spanned multiple types of memory. You can easily + * construct MPI datatypes to do that, so I'd lean toward + * documenting that that's not allowed. + */ + } + #if OPAL_CUDA_GET_ATTRIBUTES uint32_t isManaged = 0; /* With CUDA 7.0, we can get multiple attributes with a single call */ @@ -2102,6 +2122,10 @@ void mca_cuda_convertor_init(opal_convertor_t *convertor, const void *pUserBuf) /* Checks the type of pointer * * @param dest One pointer to check + * the buffers are the real address to check, eg if there + * was a userbuf and an MPI datatype involved, the argument + * passed in here should already be offset from userbuf to + * where the data is * @param source Another pointer to check */ bool opal_cuda_check_bufs(char *dest, char *src) @@ -2132,6 +2156,9 @@ bool opal_cuda_check_bufs(char *dest, char *src) /* Checks the type of pointer * * @param buf check one pointer providing a convertor. + * when a convertor is provided, the buf should be a userbuf + * so the convertor's datatype is used to locate the offset + * where data actually begins * Provides aditional information, e.g. managed vs. unmanaged GPU buffer */ bool opal_cuda_check_one_buf(char *buf, opal_convertor_t *convertor)