From cf3562c0db6e35b11ffe233553024a7d7f2be9c8 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Wed, 17 Feb 2016 16:00:38 +0900 Subject: [PATCH 1/2] datatype: correctly support MPI_Pack_external when heterogeneous support is enabled --- ompi/mpi/c/pack_external.c | 10 +- opal/datatype/opal_convertor.c | 31 +++- opal/datatype/opal_convertor.h | 24 ++++ opal/datatype/opal_datatype_pack.c | 174 ++++++++++++++++++++++- opal/datatype/opal_datatype_prototypes.h | 6 + 5 files changed, 238 insertions(+), 7 deletions(-) diff --git a/ompi/mpi/c/pack_external.c b/ompi/mpi/c/pack_external.c index 5ced6182b09..be76f2bb7d3 100644 --- a/ompi/mpi/c/pack_external.c +++ b/ompi/mpi/c/pack_external.c @@ -13,7 +13,7 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -78,10 +78,10 @@ int MPI_Pack_external(const char datarep[], const void *inbuf, int incount, * CONVERTOR_SEND_CONVERSION in order to force the convertor to do anything * more than just packing the data. */ - opal_convertor_copy_and_prepare_for_send( ompi_mpi_external32_convertor, - &(datatype->super), incount, (void *) inbuf, - CONVERTOR_SEND_CONVERSION, - &local_convertor ); + opal_convertor_copy_and_prepare_for_send_external( ompi_mpi_external32_convertor, + &(datatype->super), incount, (void *) inbuf, + CONVERTOR_SEND_CONVERSION, + &local_convertor ); /* Check for truncation */ opal_convertor_get_packed_size( &local_convertor, &size ); diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index d5481283183..2feedaba48e 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2013 Research Organization for Information Science + * Copyright (c) 2013-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -628,6 +628,35 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor, return OPAL_SUCCESS; } +int32_t opal_convertor_prepare_for_send_external( opal_convertor_t* convertor, + const struct opal_datatype_t* datatype, + int32_t count, + const void* pUserBuf ) +{ +#if OPAL_CUDA_SUPPORT + mca_cuda_convertor_init(convertor, pUserBuf); +#endif + + OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ); + convertor->flags |= CONVERTOR_SEND; + +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT + if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) { + convertor->fAdvance = opal_pack_general; + } else +#endif + if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { + if( ((datatype->ub - datatype->lb) == (OPAL_PTRDIFF_TYPE)datatype->size) + || (1 >= convertor->count) ) + convertor->fAdvance = opal_pack_homogeneous_contig; + else + convertor->fAdvance = opal_pack_homogeneous_contig_with_gaps; + } else { + convertor->fAdvance = opal_generic_simple_pack; + } + return OPAL_SUCCESS; +} + /* * These functions can be used in order to create an IDENTICAL copy of one convertor. In this * context IDENTICAL means that the datatype and count and all other properties of the basic diff --git a/opal/datatype/opal_convertor.h b/opal/datatype/opal_convertor.h index 5b26b7e7d63..96855480dfe 100644 --- a/opal/datatype/opal_convertor.h +++ b/opal/datatype/opal_convertor.h @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -249,6 +251,28 @@ static inline int32_t opal_convertor_copy_and_prepare_for_send( const opal_conve return opal_convertor_prepare_for_send( convertor, datatype, count, pUserBuf ); } +/* + * + */ +OPAL_DECLSPEC int32_t opal_convertor_prepare_for_send_external( opal_convertor_t* convertor, + const struct opal_datatype_t* datatype, + int32_t count, + const void* pUserBuf); + +static inline int32_t opal_convertor_copy_and_prepare_for_send_external( const opal_convertor_t* pSrcConv, + const struct opal_datatype_t* datatype, + int32_t count, + const void* pUserBuf, + int32_t flags, + opal_convertor_t* convertor ) +{ + convertor->remoteArch = pSrcConv->remoteArch; + convertor->flags = pSrcConv->flags | flags; + convertor->master = pSrcConv->master; + + return opal_convertor_prepare_for_send_external( convertor, datatype, count, pUserBuf ); +} + /* * */ diff --git a/opal/datatype/opal_datatype_pack.c b/opal/datatype/opal_datatype_pack.c index 45f1213b811..1edc2af4f3f 100644 --- a/opal/datatype/opal_datatype_pack.c +++ b/opal/datatype/opal_datatype_pack.c @@ -11,7 +11,9 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -393,3 +395,173 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor, pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); ); return 0; } + +#if ! defined(CHECKSUM) +/* + * Remember that the first item in the stack (ie. position 0) is the number + * of times the datatype is involved in the operation (ie. the count argument + * in the MPI_ call). + */ +/* Convert data from multiple input buffers (as received from the network layer) + * to a contiguous output buffer with a predefined size. + * return OPAL_SUCCESS if everything went OK and if there is still room before the complete + * conversion of the data (need additional call with others input buffers ) + * 1 if everything went fine and the data was completly converted + * -1 something wrong occurs. + */ +int32_t +opal_pack_general( opal_convertor_t* pConvertor, + struct iovec* iov, uint32_t* out_size, + size_t* max_data ) +{ + dt_stack_t* pStack; /* pointer to the position on the stack */ + uint32_t pos_desc; /* actual position in the description of the derived datatype */ + uint32_t count_desc; /* the number of items already done in the actual pos_desc */ + uint16_t type = OPAL_DATATYPE_MAX_PREDEFINED; /* type at current position */ + size_t total_unpacked = 0; /* total size unpacked this time */ + dt_elem_desc_t* description; + dt_elem_desc_t* pElem; + const opal_datatype_t *pData = pConvertor->pDesc; + unsigned char *conv_ptr, *iov_ptr; + size_t iov_len_local; + uint32_t iov_count; + + const opal_convertor_master_t* master = pConvertor->master; + OPAL_PTRDIFF_TYPE advance; /* number of bytes that we should advance the buffer */ + int32_t rc; + + DO_DEBUG( opal_output( 0, "opal_convertor_general_pack( %p, {%p, %lu}, %u )\n", + (void*)pConvertor, iov[0].iov_base, (unsigned long)iov[0].iov_len, *out_size ); ); + + description = pConvertor->use_desc->desc; + + /* For the first step we have to add both displacement to the source. After in the + * main while loop we will set back the source_base to the correct value. This is + * due to the fact that the convertor can stop in the middle of a data with a count + */ + pStack = pConvertor->pStack + pConvertor->stack_pos; + pos_desc = pStack->index; + conv_ptr = pConvertor->pBaseBuf + pStack->disp; + count_desc = (uint32_t)pStack->count; + pStack--; + pConvertor->stack_pos--; + pElem = &(description[pos_desc]); + + DO_DEBUG( opal_output( 0, "unpack start pos_desc %d count_desc %d disp %ld\n" + "stack_pos %d pos_desc %d count_desc %d disp %ld\n", + pos_desc, count_desc, (long)(conv_ptr - pConvertor->pBaseBuf), + pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)(pStack->disp) ); ); + + for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { + iov_ptr = (unsigned char *) iov[iov_count].iov_base; + iov_len_local = iov[iov_count].iov_len; + assert( 0 == pConvertor->partial_length ); + while( 1 ) { + while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { + /* now here we have a basic datatype */ + type = description[pos_desc].elem.common.type; + OPAL_DATATYPE_SAFEGUARD_POINTER( conv_ptr + pElem->elem.disp, pData->size, pConvertor->pBaseBuf, + pData, pConvertor->count ); + DO_DEBUG( opal_output( 0, "unpack (%p:%ld, %d, %ld) -> (%p, %ld) type %s\n", + pConvertor->pBaseBuf, conv_ptr + pElem->elem.disp - pConvertor->pBaseBuf, + count_desc, description[pos_desc].elem.extent, + iov_ptr, iov_len_local, + opal_datatype_basicDatatypes[type]->name ); ); + + rc = master->pFunctions[type]( pConvertor, count_desc, + conv_ptr + pElem->elem.disp, + (pConvertor->pDesc->ub - pConvertor->pDesc->lb) * pConvertor->count, + description[pos_desc].elem.extent, + iov_ptr, iov_len_local, opal_datatype_basicDatatypes[type]->size, + &advance ); + iov_len_local -= advance; /* decrease the available space in the buffer */ + iov_ptr += advance; /* increase the pointer to the buffer */ + count_desc -= rc; /* compute leftovers */ + if( 0 == count_desc ) { /* completed */ + conv_ptr = pConvertor->pBaseBuf + pStack->disp; + pos_desc++; /* advance to the next data */ + UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); + continue; + } + conv_ptr += rc * description[pos_desc].elem.extent; + assert( pElem->elem.common.type < OPAL_DATATYPE_MAX_PREDEFINED ); + assert( 0 == iov_len_local ); +#if 0 + if( 0 != iov_len_local ) { + unsigned char* temp = conv_ptr; + /* We have some partial data here. Let's copy it into the convertor + * and keep it hot until the next round. + */ + assert( iov_len_local < opal_datatype_basicDatatypes[pElem->elem.common.type]->size ); + COMPUTE_CSUM( iov_ptr, iov_len_local, pConvertor ); + + opal_unpack_partial_datatype( pConvertor, pElem, + iov_ptr, 0, iov_len_local, + &temp ); + + pConvertor->partial_length = (uint32_t)iov_len_local; + iov_len_local = 0; + } +#endif + goto complete_loop; + } + if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ + DO_DEBUG( opal_output( 0, "unpack end_loop count %d stack_pos %d pos_desc %d disp %ld space %lu\n", + (int)pStack->count, pConvertor->stack_pos, pos_desc, + (long)pStack->disp, (unsigned long)iov_len_local ); ); + if( --(pStack->count) == 0 ) { /* end of loop */ + if( 0 == pConvertor->stack_pos ) { + /* Do the same thing as when the loop is completed */ + iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */ + total_unpacked += iov[iov_count].iov_len; + iov_count++; /* go to the next */ + goto complete_conversion; + } + pConvertor->stack_pos--; + pStack--; + pos_desc++; + } else { + pos_desc = pStack->index + 1; + if( pStack->index == -1 ) { + pStack->disp += (pData->ub - pData->lb); + } else { + assert( OPAL_DATATYPE_LOOP == description[pStack->index].loop.common.type ); + pStack->disp += description[pStack->index].loop.extent; + } + } + conv_ptr = pConvertor->pBaseBuf + pStack->disp; + UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); + DO_DEBUG( opal_output( 0, "unpack new_loop count %d stack_pos %d pos_desc %d disp %ld space %lu\n", + (int)pStack->count, pConvertor->stack_pos, pos_desc, + (long)pStack->disp, (unsigned long)iov_len_local ); ); + } + if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { + PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc, + pStack->disp ); + pos_desc++; + conv_ptr = pConvertor->pBaseBuf + pStack->disp; + UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); + DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" ); + continue; + } + } + complete_loop: + iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */ + total_unpacked += iov[iov_count].iov_len; + } + complete_conversion: + *max_data = total_unpacked; + pConvertor->bConverted += total_unpacked; /* update the already converted bytes */ + *out_size = iov_count; + if( pConvertor->bConverted == pConvertor->remote_size ) { + pConvertor->flags |= CONVERTOR_COMPLETED; + return 1; + } + /* Save the global position for the next round */ + PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem->elem.common.type, count_desc, + conv_ptr - pConvertor->pBaseBuf ); + DO_DEBUG( opal_output( 0, "unpack save stack stack_pos %d pos_desc %d count_desc %d disp %ld\n", + pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); ); + return 0; +} +#endif diff --git a/opal/datatype/opal_datatype_prototypes.h b/opal/datatype/opal_datatype_prototypes.h index bcfb59b9b31..8f9db2e1718 100644 --- a/opal/datatype/opal_datatype_prototypes.h +++ b/opal/datatype/opal_datatype_prototypes.h @@ -4,6 +4,8 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Research Organization for Information Science * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,6 +34,10 @@ opal_unpack_general_checksum( opal_convertor_t* pConvertor, struct iovec* iov, uint32_t* out_size, size_t* max_data ); +OPAL_DECLSPEC int32_t +opal_pack_general( opal_convertor_t* pConvertor, + struct iovec* iov, uint32_t* out_size, + size_t* max_data ); /* * Now the internal functions */ From 7c10edbd72c9c95a08e18140633fef5bc4a443d1 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Thu, 18 Feb 2016 13:35:49 +0900 Subject: [PATCH 2/2] datatype: correctly support MPI_Pack_external also when heterogeneous support is not enabled --- ompi/mpi/c/unpack_external.c | 6 ++-- opal/datatype/opal_convertor.c | 61 ++++++++++++++++++++++------------ opal/datatype/opal_convertor.h | 21 ++++++++++++ 3 files changed, 63 insertions(+), 25 deletions(-) diff --git a/ompi/mpi/c/unpack_external.c b/ompi/mpi/c/unpack_external.c index 9a312e884e3..d74f20d69fb 100644 --- a/ompi/mpi/c/unpack_external.c +++ b/ompi/mpi/c/unpack_external.c @@ -13,7 +13,7 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -72,8 +72,8 @@ int MPI_Unpack_external (const char datarep[], const void *inbuf, MPI_Aint insiz OBJ_CONSTRUCT(&local_convertor, opal_convertor_t); /* the resulting convertor will be set to the position ZERO */ - opal_convertor_copy_and_prepare_for_recv( ompi_mpi_external32_convertor, - &(datatype->super), outcount, outbuf, 0, &local_convertor ); + opal_convertor_copy_and_prepare_for_recv_external( ompi_mpi_external32_convertor, + &(datatype->super), outcount, outbuf, 0, &local_convertor ); /* Check for truncation */ opal_convertor_get_packed_size( &local_convertor, &size ); diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index 2feedaba48e..0b6120a529f 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -451,7 +451,6 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, /** * Compute the remote size. */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT #define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype, bdt_mask) \ { \ if( OPAL_UNLIKELY(0 != (bdt_mask)) ) { \ @@ -472,13 +471,6 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, convertor->use_desc = &(datatype->desc); \ } \ } -#else -#define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype, bdt_mask) \ -{ \ - assert(0 == (bdt_mask)); \ - (void)bdt_mask; /* silence compiler warning */ \ -} -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */ /** * This macro will initialize a convertor based on a previously created @@ -592,6 +584,32 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor, } +int32_t opal_convertor_prepare_for_recv_external( opal_convertor_t* convertor, + const struct opal_datatype_t* datatype, + int32_t count, + const void* pUserBuf ) +{ + /* Here I should check that the data is not overlapping */ + + convertor->flags |= CONVERTOR_RECV; +#if OPAL_CUDA_SUPPORT + mca_cuda_convertor_init(convertor, pUserBuf); +#endif + + OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ); + + assert(! (convertor->flags & CONVERTOR_WITH_CHECKSUM)); + if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) { + convertor->fAdvance = opal_unpack_general; + } else if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { + convertor->fAdvance = opal_unpack_homogeneous_contig; + } else { + convertor->fAdvance = opal_generic_simple_unpack; + } + return OPAL_SUCCESS; +} + + int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor, const struct opal_datatype_t* datatype, int32_t count, @@ -640,20 +658,19 @@ int32_t opal_convertor_prepare_for_send_external( opal_convertor_t* convertor, OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ); convertor->flags |= CONVERTOR_SEND; -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) { - convertor->fAdvance = opal_pack_general; - } else -#endif - if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { - if( ((datatype->ub - datatype->lb) == (OPAL_PTRDIFF_TYPE)datatype->size) - || (1 >= convertor->count) ) - convertor->fAdvance = opal_pack_homogeneous_contig; - else - convertor->fAdvance = opal_pack_homogeneous_contig_with_gaps; - } else { - convertor->fAdvance = opal_generic_simple_pack; - } + assert(! (convertor->flags & CONVERTOR_WITH_CHECKSUM)); + + if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) { + convertor->fAdvance = opal_pack_general; + } else if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { + if( ((datatype->ub - datatype->lb) == (OPAL_PTRDIFF_TYPE)datatype->size) + || (1 >= convertor->count) ) + convertor->fAdvance = opal_pack_homogeneous_contig; + else + convertor->fAdvance = opal_pack_homogeneous_contig_with_gaps; + } else { + convertor->fAdvance = opal_generic_simple_pack; + } return OPAL_SUCCESS; } diff --git a/opal/datatype/opal_convertor.h b/opal/datatype/opal_convertor.h index 96855480dfe..2060d106aea 100644 --- a/opal/datatype/opal_convertor.h +++ b/opal/datatype/opal_convertor.h @@ -294,6 +294,27 @@ static inline int32_t opal_convertor_copy_and_prepare_for_recv( const opal_conve return opal_convertor_prepare_for_recv( convertor, datatype, count, pUserBuf ); } +/* + * + */ +OPAL_DECLSPEC int32_t opal_convertor_prepare_for_recv_external( opal_convertor_t* convertor, + const struct opal_datatype_t* datatype, + int32_t count, + const void* pUserBuf ); +static inline int32_t opal_convertor_copy_and_prepare_for_recv_external( const opal_convertor_t* pSrcConv, + const struct opal_datatype_t* datatype, + int32_t count, + const void* pUserBuf, + int32_t flags, + opal_convertor_t* convertor ) +{ + convertor->remoteArch = pSrcConv->remoteArch; + convertor->flags = (pSrcConv->flags | flags); + convertor->master = pSrcConv->master; + + return opal_convertor_prepare_for_recv_external( convertor, datatype, count, pUserBuf ); +} + /* * Give access to the raw memory layout based on the datatype. */