Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions config/opal_check_cuda.m4
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ AS_IF([test "$opal_check_cuda_happy" = "yes"],
# If we have CUDA support, check to see if we have support for SYNC_MEMOPS
# which was first introduced in CUDA 6.0.
AS_IF([test "$opal_check_cuda_happy"="yes"],
AC_CHECK_DECL([CU_POINTER_ATTRIBUTE_SYNC_MEMOPS], [CUDA_SYNC_MEMOPS=1], [CUDA_SYNC_MEMOPS=0],
[#include <$opal_cuda_incdir/cuda.h>]),
[AC_CHECK_DECL([CU_POINTER_ATTRIBUTE_SYNC_MEMOPS], [CUDA_SYNC_MEMOPS=1], [CUDA_SYNC_MEMOPS=0],
[#include <$opal_cuda_incdir/cuda.h>])],
[])

# If we have CUDA support, check to see if we have CUDA 6.0 or later.
Expand Down
11 changes: 7 additions & 4 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -1369,10 +1369,13 @@ OPAL_SETUP_WRAPPER_FINAL
# autoconf macro defines in mpi.h. Since AC sometimes changes whether
# things are defined as null tokens or an integer result, two projects
# with different versions of AC can cause problems.
if test $ac_cv_header_stdc = yes; then
AC_DEFINE(OPAL_STDC_HEADERS, 1,
[Do not use outside of mpi.h. Define to 1 if you have the ANSI C header files.])
fi

# According to the autoconf 2.67 documentation the AC_HEADER_STDC macro,
# and therefore the ac_cv_header_stdc cache variable, is obsolescent, as
# current systems have conforming header files. Instead of removing the
# protection completely, let's just make sure it is always on.
AC_DEFINE(OPAL_STDC_HEADERS, 1,
[Do not use outside of mpi.h. Define to 1 if you have the ANSI C header files.])
if test $ac_cv_header_sys_time_h = yes ; then
AC_DEFINE(OPAL_HAVE_SYS_TIME_H, 1,
[Do not use outside of mpi.h. Define to 1 if you have the <sys/time.h> header file.])
Expand Down
3 changes: 2 additions & 1 deletion opal/datatype/opal_convertor.c
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,8 @@ size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor )
pConvertor->remote_size = pConvertor->local_size;
if( OPAL_UNLIKELY(datatype->bdt_used & pConvertor->master->hetero_mask) ) {
pConvertor->flags &= (~CONVERTOR_HOMOGENEOUS);
if (!(pConvertor->flags & CONVERTOR_SEND && pConvertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS)) {
/* Can we use the optimized description? */
if (pConvertor->flags & OPAL_DATATYPE_OPTIMIZED_RESTRICTED) {
pConvertor->use_desc = &(datatype->desc);
}
if( 0 == (pConvertor->flags & CONVERTOR_HAS_REMOTE_SIZE) ) {
Expand Down
17 changes: 12 additions & 5 deletions opal/datatype/opal_datatype.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,18 @@ BEGIN_C_DECLS
* We should make the difference here between the predefined contiguous and non contiguous
* datatypes. The OPAL_DATATYPE_FLAG_BASIC is held by all predefined contiguous datatypes.
*/
#define OPAL_DATATYPE_FLAG_BASIC (OPAL_DATATYPE_FLAG_PREDEFINED | \
OPAL_DATATYPE_FLAG_CONTIGUOUS | \
OPAL_DATATYPE_FLAG_NO_GAPS | \
OPAL_DATATYPE_FLAG_DATA | \
OPAL_DATATYPE_FLAG_COMMITTED)
#define OPAL_DATATYPE_FLAG_BASIC \
(OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS \
| OPAL_DATATYPE_FLAG_DATA | OPAL_DATATYPE_FLAG_COMMITTED)
/*
* If during the datatype optimization process we collapse contiguous elements with
* different types, we cannot use this optimized description for any communication
* in a heterogeneous setting, especially not for the exteranl32 support.
*
* A datatype with this flag cannot use the optimized description in heterogeneous
* setups.
*/
#define OPAL_DATATYPE_OPTIMIZED_RESTRICTED 0x1000

/**
* The number of supported entries in the data-type definition and the
Expand Down
44 changes: 33 additions & 11 deletions opal/datatype/opal_datatype_dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,39 @@ int opal_datatype_contain_basic_datatypes( const opal_datatype_t* pData, char* p
int opal_datatype_dump_data_flags( unsigned short usflags, char* ptr, size_t length )
{
int index = 0;
if( length < 22 ) return 0;
index = snprintf( ptr, 22, "-----------[---][---]" ); /* set everything to - */
if( usflags & OPAL_DATATYPE_FLAG_COMMITTED ) ptr[1] = 'c';
if( usflags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) ptr[2] = 'C';
if( usflags & OPAL_DATATYPE_FLAG_OVERLAP ) ptr[3] = 'o';
if( usflags & OPAL_DATATYPE_FLAG_USER_LB ) ptr[4] = 'l';
if( usflags & OPAL_DATATYPE_FLAG_USER_UB ) ptr[5] = 'u';
if( usflags & OPAL_DATATYPE_FLAG_PREDEFINED ) ptr[6] = 'P';
if( !(usflags & OPAL_DATATYPE_FLAG_NO_GAPS) ) ptr[7] = 'G';
if( usflags & OPAL_DATATYPE_FLAG_DATA ) ptr[8] = 'D';
if( (usflags & OPAL_DATATYPE_FLAG_BASIC) == OPAL_DATATYPE_FLAG_BASIC ) ptr[9] = 'B';
if (length < 22) {
return 0;
}
index = snprintf(ptr, 22, "-----------[---][---]"); /* set everything to - */
if (usflags & OPAL_DATATYPE_FLAG_COMMITTED) {
ptr[1] = 'c';
}
if (usflags & OPAL_DATATYPE_FLAG_CONTIGUOUS) {
ptr[2] = 'C';
}
if (usflags & OPAL_DATATYPE_FLAG_OVERLAP) {
ptr[3] = 'o';
}
if (usflags & OPAL_DATATYPE_FLAG_USER_LB) {
ptr[4] = 'l';
}
if (usflags & OPAL_DATATYPE_FLAG_USER_UB) {
ptr[5] = 'u';
}
if (usflags & OPAL_DATATYPE_FLAG_PREDEFINED) {
ptr[6] = 'P';
}
if (!(usflags & OPAL_DATATYPE_FLAG_NO_GAPS)) {
ptr[7] = 'G';
}
if (usflags & OPAL_DATATYPE_FLAG_DATA) {
ptr[8] = 'D';
}
if ((usflags & OPAL_DATATYPE_FLAG_BASIC) == OPAL_DATATYPE_FLAG_BASIC) {
ptr[9] = 'B';
} else if (usflags & OPAL_DATATYPE_OPTIMIZED_RESTRICTED) {
ptr[9] = 'H'; /* optimized description restricted to homogeneous cases */
}
/* We know nothing about the upper level language or flags! */
/* ... */
return index;
Expand Down
51 changes: 8 additions & 43 deletions opal/datatype/opal_datatype_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,51 +36,16 @@

extern int opal_datatype_dfd;

# define DDT_DUMP_STACK( PSTACK, STACK_POS, PDESC, NAME ) \
opal_datatype_dump_stack( (PSTACK), (STACK_POS), (PDESC), (NAME) )
# if defined(ACCEPT_C99)
# define DUMP( ARGS... ) opal_output(opal_datatype_dfd, __VA_ARGS__)
# else
# if defined(__GNUC__) && !defined(__STDC__)
# define DUMP(ARGS...) opal_output( opal_datatype_dfd, ARGS)
# else
static inline void DUMP( char* fmt, ... )
{
va_list list;
# define DDT_DUMP_STACK(PSTACK, STACK_POS, PDESC, NAME) \
opal_datatype_dump_stack((PSTACK), (STACK_POS), (PDESC), (NAME))

# define DUMP(...) opal_output(opal_datatype_dfd, __VA_ARGS__)

va_start( list, fmt );
opal_output_vverbose( 0, opal_datatype_dfd, fmt, list );
va_end( list );
}
# endif /* __GNUC__ && !__STDC__ */
# endif /* ACCEPT_C99 */
#else
# define DDT_DUMP_STACK( PSTACK, STACK_POS, PDESC, NAME )
# if defined(ACCEPT_C99)
# define DUMP(ARGS...)
# else
# if defined(__GNUC__) && !defined(__STDC__)
# define DUMP(ARGS...)
# else
/* If we do not compile with PGI, mark the parameter as unused */
# if !defined(__PGI)
# define __opal_attribute_unused_tmp__ __opal_attribute_unused__
# else
# define __opal_attribute_unused_tmp__
# endif
static inline void DUMP( char* fmt __opal_attribute_unused_tmp__, ... )
{
#if defined(__PGI)
/* Some compilers complain if we have "..." arguments and no
corresponding va_start() */
va_list arglist;
va_start(arglist, fmt);
va_end(arglist);
#endif
}
# undef __opal_attribute_unused_tmp__
# endif /* __GNUC__ && !__STDC__ */
# endif /* ACCEPT_C99 */

# define DDT_DUMP_STACK(PSTACK, STACK_POS, PDESC, NAME)
# define DUMP(...)

#endif /* VERBOSE */


Expand Down
35 changes: 22 additions & 13 deletions opal/datatype/opal_datatype_optimize.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,12 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
compress.blocklen = pData->desc.desc[pos_desc + index].elem.blocklen;
for( uint32_t i = index+1; i < loop->items; i++ ) {
current = &pData->desc.desc[pos_desc + i].elem;
assert(1 == current->count);
if( (current->common.type == OPAL_DATATYPE_LOOP) ||
compress.common.type != current->common.type ) {
compress.common.type = OPAL_DATATYPE_UINT1;
assert(1 == current->count);
if ((current->common.type == OPAL_DATATYPE_LOOP)
|| compress.common.type != current->common.type) {
compress.common.type = OPAL_DATATYPE_UINT1;
compress.common.flags |= OPAL_DATATYPE_OPTIMIZED_RESTRICTED;
pData->flags |= OPAL_DATATYPE_OPTIMIZED_RESTRICTED;
compress.blocklen = end_loop->size;
break;
}
Expand Down Expand Up @@ -174,12 +176,14 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
/* are the two elements compatible: aka they have very similar values and they
* can be merged together by increasing the count, and/or changing the extent.
*/
if( (last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) ==
(current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size) ) {
ddt_elem_desc_t save = last; /* safekeep the type and blocklen */
if( last.common.type != current->common.type ) {
last.blocklen *= opal_datatype_basicDatatypes[last.common.type]->size;
last.common.type = OPAL_DATATYPE_UINT1;
if ((last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size)
== (current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size)) {
ddt_elem_desc_t save = last; /* safekeep the type and blocklen */
if (last.common.type != current->common.type) {
last.blocklen *= opal_datatype_basicDatatypes[last.common.type]->size;
last.common.type = OPAL_DATATYPE_UINT1;
last.common.flags |= OPAL_DATATYPE_OPTIMIZED_RESTRICTED;
pData->flags |= OPAL_DATATYPE_OPTIMIZED_RESTRICTED;
}

if( (last.extent * (ptrdiff_t)last.count + last.disp) == current->disp ) {
Expand Down Expand Up @@ -225,9 +229,14 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
if( last.common.type == current->common.type ) {
last.blocklen += current->blocklen;
} else {
last.blocklen = ((last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) +
(current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size));
last.common.type = OPAL_DATATYPE_UINT1;
last.blocklen = ((last.blocklen
* opal_datatype_basicDatatypes[last.common.type]->size)
+ (current->blocklen
* opal_datatype_basicDatatypes[current->common.type]
->size));
last.common.type = OPAL_DATATYPE_UINT1;
last.common.flags |= OPAL_DATATYPE_OPTIMIZED_RESTRICTED;
pData->flags |= OPAL_DATATYPE_OPTIMIZED_RESTRICTED;
}
last.extent += current->extent;
if( current->count != 1 ) {
Expand Down
107 changes: 74 additions & 33 deletions opal/datatype/opal_datatype_pack.c
Original file line number Diff line number Diff line change
Expand Up @@ -383,42 +383,83 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor,
*/

static inline void
pack_predefined_heterogeneous( opal_convertor_t* CONVERTOR,
const dt_elem_desc_t* ELEM,
size_t* COUNT,
unsigned char** SOURCE,
unsigned char** DESTINATION,
size_t* SPACE )
pack_predefined_heterogeneous(opal_convertor_t *CONVERTOR,
const dt_elem_desc_t *ELEM, size_t *COUNT,
unsigned char **memory,
unsigned char **packed, size_t *SPACE)
{
const opal_convertor_master_t* master = (CONVERTOR)->master;
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
unsigned char* _source = (*SOURCE) + _elem->disp;
ptrdiff_t advance;
size_t _count = *(COUNT);
size_t _r_blength;

_r_blength = master->remote_sizes[_elem->common.type];
if( (_count * _r_blength) > *(SPACE) ) {
_count = (*(SPACE) / _r_blength);
if( 0 == _count ) return; /* nothing to do */
const opal_convertor_master_t *master = (CONVERTOR)->master;
const ddt_elem_desc_t *_elem = &((ELEM)->elem);
size_t cando_count = *(COUNT), do_now_bytes;
size_t local_elem_size = opal_datatype_basicDatatypes[_elem->common.type]->size;
size_t remote_elem_size = master->remote_sizes[_elem->common.type];
size_t blocklen_bytes = remote_elem_size;
unsigned char *_memory = (*memory) + _elem->disp;
unsigned char *_packed = *packed;
ptrdiff_t advance = 0;

assert(0 == (cando_count % _elem->blocklen)); /* no partials here */
assert(*(COUNT) <= ((size_t) _elem->count * _elem->blocklen));

if ((remote_elem_size * cando_count) > *(SPACE))
cando_count = (*SPACE) / blocklen_bytes;

/* premptively update the number of COUNT we will return. */
*(COUNT) -= cando_count;

if (_elem->blocklen == 1) {
master->pFunctions[_elem->common.type](CONVERTOR, cando_count,
_memory, *SPACE, _elem->extent,
_packed, *SPACE, remote_elem_size,
&advance);
_memory += cando_count * _elem->extent;
_packed += cando_count * remote_elem_size;
goto update_and_return;
}

OPAL_DATATYPE_SAFEGUARD_POINTER( _source, (_count * _elem->extent), (CONVERTOR)->pBaseBuf,
(CONVERTOR)->pDesc, (CONVERTOR)->count );
DO_DEBUG( opal_output( 0, "pack [l %s r %s] memcpy( %p, %p, %lu ) => space %lu\n",
((ptrdiff_t)(opal_datatype_basicDatatypes[_elem->common.type]->size) == _elem->extent) ? "cont" : "----",
((ptrdiff_t)_r_blength == _elem->extent) ? "cont" : "----",
(void*)*(DESTINATION), (void*)_source, (unsigned long)_r_blength,
(unsigned long)(*(SPACE)) ); );
master->pFunctions[_elem->common.type]( CONVERTOR, _count,
_source, *SPACE, _elem->extent,
*DESTINATION, *SPACE, _r_blength,
&advance );
_r_blength *= _count; /* update the remote length to encompass all the elements */
*(SOURCE) += _count * _elem->extent;
*(DESTINATION) += _r_blength;
*(SPACE) -= _r_blength;
*(COUNT) -= _count;
if ((1 < _elem->count) && (_elem->blocklen <= cando_count)) {
blocklen_bytes = remote_elem_size * _elem->blocklen;

do { /* Do as many full blocklen as possible */
OPAL_DATATYPE_SAFEGUARD_POINTER(_memory, blocklen_bytes, (CONVERTOR)->pBaseBuf,
(CONVERTOR)->pDesc, (CONVERTOR)->count);
DO_DEBUG(opal_output(0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n",
(void *) _packed, (void *) _memory, (unsigned long) blocklen_bytes,
(unsigned long) (*(SPACE) - (_packed - *(packed)))););
master->pFunctions[_elem->common.type](CONVERTOR, _elem->blocklen,
_memory, *SPACE, local_elem_size,
_packed, *SPACE, remote_elem_size,
&advance);
_packed += blocklen_bytes;
_memory += _elem->extent;
cando_count -= _elem->blocklen;
} while (_elem->blocklen <= cando_count);
}

/**
* As an epilog do anything left from the last blocklen.
*/
if (0 != cando_count) {
assert((cando_count < _elem->blocklen)
|| ((1 == _elem->count) && (cando_count <= _elem->blocklen)));
do_now_bytes = cando_count * remote_elem_size;
OPAL_DATATYPE_SAFEGUARD_POINTER(_memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
(CONVERTOR)->pDesc, (CONVERTOR)->count);
DO_DEBUG(opal_output(0, "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n",
(void *) _packed, (void *) _memory, (unsigned long) do_now_bytes,
(unsigned long) (*(SPACE) - (_packed - *(packed)))););
master->pFunctions[_elem->common.type](CONVERTOR, cando_count,
_memory, *SPACE, local_elem_size,
_packed, *SPACE, remote_elem_size,
&advance);
_memory += do_now_bytes;
_packed += do_now_bytes;
}

update_and_return:
*(memory) = _memory - _elem->disp;
*(SPACE) -= (_packed - *packed);
*(packed) = _packed;
}

int32_t
Expand Down
Loading